R set up

Activate renv to create renv.lock file to track dependencies for data reproducibility

renv::init()

If downloading project from github and want to restore previous dependencies used for analyses run this line

renv::restore()

Download packages so stored in renv.lock file

# To create Pdf
install.packages("knitr")

# Importing Data
install.packages("haven")
install.packages("readxl")

# Table specific
install.packages("xtable")
install.packages("kableExtra")

# Data Cleaning and plotting
install.packages("tidyverse")
install.packages("dplyr")
install.packages("plyr")
install.packages("ggplot2")
install.packages("scales")
install.packages("gridExtra")

# Skewness install.packages
install.packages("e1071")

# Matrix operations
install.packages("matrixStats")

# Mixed Models
install.packages("lme4")
install.packages("nlme")
#install.packages("HLMdiag") # Model diagnostics

# To read the lables
install.packages("Hmisc")

# to order the data
install.packages("doBy")

# to re-code variables
install.packages("rockchalk")

# for reshape
install.packages("MASS")

# descriptive and more
install.packages("psych")

# correlogram plot
install.packages("corrplot")

#glmer diagnostics
install.packages("fitdistrplus")

#outlier analysis and R^2
install.packages("performance")
install.packages("rptR")

# Creating stratified tables
install.packages("arsenal")

#For zero-inflated negative binomial modeling/model checking
install.packages("remotes")
library(remotes)
install_github("nyiuab/NBZIMM", force=T, build_vignettes=F)
install.packages("philentropy")

Load in libraries

## ── Attaching core tidyverse packages ──────────────────────── tidyverse 2.0.0 ──
## ✔ dplyr     1.1.1     ✔ readr     2.1.4
## ✔ forcats   1.0.0     ✔ stringr   1.5.0
## ✔ ggplot2   3.4.2     ✔ tibble    3.2.1
## ✔ lubridate 1.9.2     ✔ tidyr     1.3.0
## ✔ purrr     1.0.1     
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## ✖ dplyr::filter()     masks stats::filter()
## ✖ dplyr::group_rows() masks kableExtra::group_rows()
## ✖ dplyr::lag()        masks stats::lag()
## ℹ Use the conflicted package (<http://conflicted.r-lib.org/>) to force all conflicts to become errors
## ------------------------------------------------------------------------------
## 
## You have loaded plyr after dplyr - this is likely to cause problems.
## If you need functions from both plyr and dplyr, please load plyr first, then dplyr:
## library(plyr); library(dplyr)
## 
## ------------------------------------------------------------------------------
## 
## 
## Attaching package: 'plyr'
## 
## 
## The following objects are masked from 'package:dplyr':
## 
##     arrange, count, desc, failwith, id, mutate, rename, summarise,
##     summarize
## 
## 
## The following object is masked from 'package:purrr':
## 
##     compact
## 
## 
## 
## Attaching package: 'scales'
## 
## 
## The following object is masked from 'package:purrr':
## 
##     discard
## 
## 
## The following object is masked from 'package:readr':
## 
##     col_factor
## 
## 
## 
## Attaching package: 'gridExtra'
## 
## 
## The following object is masked from 'package:dplyr':
## 
##     combine
## 
## 
## 
## Attaching package: 'matrixStats'
## 
## 
## The following object is masked from 'package:plyr':
## 
##     count
## 
## 
## The following object is masked from 'package:dplyr':
## 
##     count
## 
## 
## Loading required package: Matrix
## 
## 
## Attaching package: 'Matrix'
## 
## 
## The following objects are masked from 'package:tidyr':
## 
##     expand, pack, unpack
## 
## 
## 
## Attaching package: 'nlme'
## 
## 
## The following object is masked from 'package:lme4':
## 
##     lmList
## 
## 
## The following object is masked from 'package:dplyr':
## 
##     collapse
## 
## 
## 
## Attaching package: 'Hmisc'
## 
## 
## The following object is masked from 'package:e1071':
## 
##     impute
## 
## 
## The following objects are masked from 'package:plyr':
## 
##     is.discrete, summarize
## 
## 
## The following objects are masked from 'package:dplyr':
## 
##     src, summarize
## 
## 
## The following objects are masked from 'package:xtable':
## 
##     label, label<-
## 
## 
## The following objects are masked from 'package:base':
## 
##     format.pval, units
## 
## 
## 
## Attaching package: 'doBy'
## 
## 
## The following object is masked from 'package:dplyr':
## 
##     order_by
## 
## 
## 
## Attaching package: 'rockchalk'
## 
## 
## The following object is masked from 'package:Hmisc':
## 
##     summarize
## 
## 
## The following objects are masked from 'package:e1071':
## 
##     kurtosis, skewness
## 
## 
## The following object is masked from 'package:plyr':
## 
##     summarize
## 
## 
## The following object is masked from 'package:dplyr':
## 
##     summarize
## 
## 
## 
## Attaching package: 'MASS'
## 
## 
## The following object is masked from 'package:rockchalk':
## 
##     mvrnorm
## 
## 
## The following object is masked from 'package:dplyr':
## 
##     select
## 
## 
## 
## Attaching package: 'psych'
## 
## 
## The following object is masked from 'package:Hmisc':
## 
##     describe
## 
## 
## The following objects are masked from 'package:scales':
## 
##     alpha, rescale
## 
## 
## The following objects are masked from 'package:ggplot2':
## 
##     %+%, alpha
## 
## 
## corrplot 0.92 loaded
## 
## Loading required package: survival
## 
## 
## Attaching package: 'performance'
## 
## 
## The following object is masked from 'package:xtable':
## 
##     display
## 
## 
## 
## Attaching package: 'arsenal'
## 
## 
## The following object is masked from 'package:Hmisc':
## 
##     %nin%
## 
## 
## The following object is masked from 'package:matrixStats':
## 
##     iqr
## 
## 
## The following object is masked from 'package:scales':
## 
##     ordinal
## 
## 
## The following object is masked from 'package:lubridate':
## 
##     is.Date
## 
## 
## 
## Attaching package: 'NBZIMM'
## 
## 
## The following object is masked from 'package:psych':
## 
##     sim
## 
## 
## The following object is masked from 'package:stringr':
## 
##     fixed
## 
## 
## 
## Attaching package: 'philentropy'
## 
## 
## The following objects are masked from 'package:psych':
## 
##     distance, manhattan, minkowski

Importing the Clean Data

Note that the data cleaning and exploration for this analysis is in a separate file written by Hedyeh Ahmadi.

HEI_Aim2_Long <- read.csv("HEI_Aim2_Long.csv")
HEI_Aim2_Wide <- read.csv("HEI_Aim2_Wide.csv")

HEI_Aim2_Long_1KidPerFamily <- read.csv("HEI_Aim2_Long_1KidPerFamily.csv")
dim(HEI_Aim2_Long)
## [1] 30419    56
dim(HEI_Aim2_Wide)
## [1] 11866   140
dim(HEI_Aim2_Long_1KidPerFamily)
## [1] 25125    56
names(HEI_Aim2_Long)
##  [1] "X"                               "subjectid"                      
##  [3] "rel_family_id"                   "abcd_site"                      
##  [5] "eventname"                       "rel_relationship"               
##  [7] "interview_date"                  "demo_l_p_select_language___1"   
##  [9] "cbcl_select_language___1"        "rel_group_id"                   
## [11] "rel_ingroup_order"               "rel_same_sex"                   
## [13] "reshist_addr1_pm252016aa"        "sex"                            
## [15] "interview_age"                   "race_ethnicity"                 
## [17] "high.educ"                       "reshist_addr1_adi_perc"         
## [19] "reshist_addr1_adi_wsum"          "overall.income.b"               
## [21] "overall.income.l"                "overall.income.alltp"           
## [23] "prnt.empl.bl"                    "prnt.empl.l"                    
## [25] "prnt.empl.alltp"                 "neighb_phenx_avg_p"             
## [27] "neighb_phenx_sum_p"              "reshist_addr1_popdensity"       
## [29] "reshist_addr1_proxrd"            "married"                        
## [31] "married.or.livingtogether"       "cbcl_scr_syn_internal_r"        
## [33] "cbcl_scr_syn_external_r"         "cbcl_scr_syn_totprob_r"         
## [35] "cbcl_scr_syn_anxdep_r"           "cbcl_scr_syn_withdep_r"         
## [37] "cbcl_scr_syn_attention_r"        "cbcl_scr_syn_rulebreak_r"       
## [39] "cbcl_scr_syn_aggressive_r"       "cbcl_scr_syn_internal_t"        
## [41] "cbcl_scr_syn_external_t"         "cbcl_scr_syn_totprob_t"         
## [43] "cbcl_scr_syn_anxdep_t"           "cbcl_scr_syn_withdep_t"         
## [45] "cbcl_scr_syn_attention_t"        "cbcl_scr_syn_rulebreak_t"       
## [47] "cbcl_scr_syn_aggressive_t"       "reshist_addr1_years"            
## [49] "income_midp"                     "demo_comb_income_v2"            
## [51] "reshist_addr1_no2_2016_aavg"     "reshist_addr1_o3_2016_annavg"   
## [53] "reshist_addr1_pm252016aa_bl"     "reshist_addr1_no2_2016_aavg_bl" 
## [55] "reshist_addr1_o3_2016_annavg_bl" "high.educ_bl"
names(HEI_Aim2_Wide)
##   [1] "X"                                    
##   [2] "subjectid"                            
##   [3] "rel_family_id"                        
##   [4] "rel_group_id"                         
##   [5] "rel_same_sex"                         
##   [6] "sex"                                  
##   [7] "race_ethnicity"                       
##   [8] "reshist_addr1_pm252016aa.Baseline"    
##   [9] "abcd_site.Baseline"                   
##  [10] "interview_date.Baseline"              
##  [11] "rel_relationship.Baseline"            
##  [12] "rel_ingroup_order.Baseline"           
##  [13] "high.educ.Baseline"                   
##  [14] "interview_age.Baseline"               
##  [15] "demo_l_p_select_language___1.Baseline"
##  [16] "cbcl_select_language___1.Baseline"    
##  [17] "reshist_addr1_adi_perc.Baseline"      
##  [18] "reshist_addr1_adi_wsum.Baseline"      
##  [19] "overall.income.b.Baseline"            
##  [20] "overall.income.l.Baseline"            
##  [21] "overall.income.alltp.Baseline"        
##  [22] "prnt.empl.bl.Baseline"                
##  [23] "prnt.empl.l.Baseline"                 
##  [24] "prnt.empl.alltp.Baseline"             
##  [25] "neighb_phenx_avg_p.Baseline"          
##  [26] "neighb_phenx_sum_p.Baseline"          
##  [27] "reshist_addr1_popdensity.Baseline"    
##  [28] "reshist_addr1_proxrd.Baseline"        
##  [29] "married.Baseline"                     
##  [30] "married.or.livingtogether.Baseline"   
##  [31] "cbcl_scr_syn_internal_r.Baseline"     
##  [32] "cbcl_scr_syn_external_r.Baseline"     
##  [33] "cbcl_scr_syn_totprob_r.Baseline"      
##  [34] "cbcl_scr_syn_anxdep_r.Baseline"       
##  [35] "cbcl_scr_syn_withdep_r.Baseline"      
##  [36] "cbcl_scr_syn_attention_r.Baseline"    
##  [37] "cbcl_scr_syn_rulebreak_r.Baseline"    
##  [38] "cbcl_scr_syn_aggressive_r.Baseline"   
##  [39] "cbcl_scr_syn_internal_t.Baseline"     
##  [40] "cbcl_scr_syn_external_t.Baseline"     
##  [41] "cbcl_scr_syn_totprob_t.Baseline"      
##  [42] "cbcl_scr_syn_anxdep_t.Baseline"       
##  [43] "cbcl_scr_syn_withdep_t.Baseline"      
##  [44] "cbcl_scr_syn_attention_t.Baseline"    
##  [45] "cbcl_scr_syn_rulebreak_t.Baseline"    
##  [46] "cbcl_scr_syn_aggressive_t.Baseline"   
##  [47] "reshist_addr1_years.Baseline"         
##  [48] "income_midp.Baseline"                 
##  [49] "demo_comb_income_v2.Baseline"         
##  [50] "reshist_addr1_no2_2016_aavg.Baseline" 
##  [51] "reshist_addr1_o3_2016_annavg.Baseline"
##  [52] "reshist_addr1_pm252016aa.1.year"      
##  [53] "abcd_site.1.year"                     
##  [54] "interview_date.1.year"                
##  [55] "rel_relationship.1.year"              
##  [56] "rel_ingroup_order.1.year"             
##  [57] "high.educ.1.year"                     
##  [58] "interview_age.1.year"                 
##  [59] "demo_l_p_select_language___1.1.year"  
##  [60] "cbcl_select_language___1.1.year"      
##  [61] "reshist_addr1_adi_perc.1.year"        
##  [62] "reshist_addr1_adi_wsum.1.year"        
##  [63] "overall.income.b.1.year"              
##  [64] "overall.income.l.1.year"              
##  [65] "overall.income.alltp.1.year"          
##  [66] "prnt.empl.bl.1.year"                  
##  [67] "prnt.empl.l.1.year"                   
##  [68] "prnt.empl.alltp.1.year"               
##  [69] "neighb_phenx_avg_p.1.year"            
##  [70] "neighb_phenx_sum_p.1.year"            
##  [71] "reshist_addr1_popdensity.1.year"      
##  [72] "reshist_addr1_proxrd.1.year"          
##  [73] "married.1.year"                       
##  [74] "married.or.livingtogether.1.year"     
##  [75] "cbcl_scr_syn_internal_r.1.year"       
##  [76] "cbcl_scr_syn_external_r.1.year"       
##  [77] "cbcl_scr_syn_totprob_r.1.year"        
##  [78] "cbcl_scr_syn_anxdep_r.1.year"         
##  [79] "cbcl_scr_syn_withdep_r.1.year"        
##  [80] "cbcl_scr_syn_attention_r.1.year"      
##  [81] "cbcl_scr_syn_rulebreak_r.1.year"      
##  [82] "cbcl_scr_syn_aggressive_r.1.year"     
##  [83] "cbcl_scr_syn_internal_t.1.year"       
##  [84] "cbcl_scr_syn_external_t.1.year"       
##  [85] "cbcl_scr_syn_totprob_t.1.year"        
##  [86] "cbcl_scr_syn_anxdep_t.1.year"         
##  [87] "cbcl_scr_syn_withdep_t.1.year"        
##  [88] "cbcl_scr_syn_attention_t.1.year"      
##  [89] "cbcl_scr_syn_rulebreak_t.1.year"      
##  [90] "cbcl_scr_syn_aggressive_t.1.year"     
##  [91] "reshist_addr1_years.1.year"           
##  [92] "income_midp.1.year"                   
##  [93] "demo_comb_income_v2.1.year"           
##  [94] "reshist_addr1_no2_2016_aavg.1.year"   
##  [95] "reshist_addr1_o3_2016_annavg.1.year"  
##  [96] "reshist_addr1_pm252016aa.2.year"      
##  [97] "abcd_site.2.year"                     
##  [98] "interview_date.2.year"                
##  [99] "rel_relationship.2.year"              
## [100] "rel_ingroup_order.2.year"             
## [101] "high.educ.2.year"                     
## [102] "interview_age.2.year"                 
## [103] "demo_l_p_select_language___1.2.year"  
## [104] "cbcl_select_language___1.2.year"      
## [105] "reshist_addr1_adi_perc.2.year"        
## [106] "reshist_addr1_adi_wsum.2.year"        
## [107] "overall.income.b.2.year"              
## [108] "overall.income.l.2.year"              
## [109] "overall.income.alltp.2.year"          
## [110] "prnt.empl.bl.2.year"                  
## [111] "prnt.empl.l.2.year"                   
## [112] "prnt.empl.alltp.2.year"               
## [113] "neighb_phenx_avg_p.2.year"            
## [114] "neighb_phenx_sum_p.2.year"            
## [115] "reshist_addr1_popdensity.2.year"      
## [116] "reshist_addr1_proxrd.2.year"          
## [117] "married.2.year"                       
## [118] "married.or.livingtogether.2.year"     
## [119] "cbcl_scr_syn_internal_r.2.year"       
## [120] "cbcl_scr_syn_external_r.2.year"       
## [121] "cbcl_scr_syn_totprob_r.2.year"        
## [122] "cbcl_scr_syn_anxdep_r.2.year"         
## [123] "cbcl_scr_syn_withdep_r.2.year"        
## [124] "cbcl_scr_syn_attention_r.2.year"      
## [125] "cbcl_scr_syn_rulebreak_r.2.year"      
## [126] "cbcl_scr_syn_aggressive_r.2.year"     
## [127] "cbcl_scr_syn_internal_t.2.year"       
## [128] "cbcl_scr_syn_external_t.2.year"       
## [129] "cbcl_scr_syn_totprob_t.2.year"        
## [130] "cbcl_scr_syn_anxdep_t.2.year"         
## [131] "cbcl_scr_syn_withdep_t.2.year"        
## [132] "cbcl_scr_syn_attention_t.2.year"      
## [133] "cbcl_scr_syn_rulebreak_t.2.year"      
## [134] "cbcl_scr_syn_aggressive_t.2.year"     
## [135] "reshist_addr1_years.2.year"           
## [136] "income_midp.2.year"                   
## [137] "demo_comb_income_v2.2.year"           
## [138] "reshist_addr1_no2_2016_aavg.2.year"   
## [139] "reshist_addr1_o3_2016_annavg.2.year"  
## [140] "rel_relationship.1_year"
names(HEI_Aim2_Long_1KidPerFamily)
##  [1] "X"                               "subjectid"                      
##  [3] "rel_family_id"                   "abcd_site"                      
##  [5] "eventname"                       "rel_relationship"               
##  [7] "interview_date"                  "demo_l_p_select_language___1"   
##  [9] "cbcl_select_language___1"        "rel_group_id"                   
## [11] "rel_ingroup_order"               "rel_same_sex"                   
## [13] "reshist_addr1_pm252016aa"        "sex"                            
## [15] "interview_age"                   "race_ethnicity"                 
## [17] "high.educ"                       "reshist_addr1_adi_perc"         
## [19] "reshist_addr1_adi_wsum"          "overall.income.b"               
## [21] "overall.income.l"                "overall.income.alltp"           
## [23] "prnt.empl.bl"                    "prnt.empl.l"                    
## [25] "prnt.empl.alltp"                 "neighb_phenx_avg_p"             
## [27] "neighb_phenx_sum_p"              "reshist_addr1_popdensity"       
## [29] "reshist_addr1_proxrd"            "married"                        
## [31] "married.or.livingtogether"       "cbcl_scr_syn_internal_r"        
## [33] "cbcl_scr_syn_external_r"         "cbcl_scr_syn_totprob_r"         
## [35] "cbcl_scr_syn_anxdep_r"           "cbcl_scr_syn_withdep_r"         
## [37] "cbcl_scr_syn_attention_r"        "cbcl_scr_syn_rulebreak_r"       
## [39] "cbcl_scr_syn_aggressive_r"       "cbcl_scr_syn_internal_t"        
## [41] "cbcl_scr_syn_external_t"         "cbcl_scr_syn_totprob_t"         
## [43] "cbcl_scr_syn_anxdep_t"           "cbcl_scr_syn_withdep_t"         
## [45] "cbcl_scr_syn_attention_t"        "cbcl_scr_syn_rulebreak_t"       
## [47] "cbcl_scr_syn_aggressive_t"       "reshist_addr1_years"            
## [49] "income_midp"                     "demo_comb_income_v2"            
## [51] "reshist_addr1_no2_2016_aavg"     "reshist_addr1_o3_2016_annavg"   
## [53] "reshist_addr1_pm252016aa_bl"     "reshist_addr1_no2_2016_aavg_bl" 
## [55] "reshist_addr1_o3_2016_annavg_bl" "high.educ_bl"
# Note we are keeping all families but choosing one kid per family
length(unique(HEI_Aim2_Long$rel_family_id))
## [1] 9844
length(unique(HEI_Aim2_Long$subjectid)) # matches number of rows of wide data :)
## [1] 11866
length(unique(HEI_Aim2_Long_1KidPerFamily$rel_family_id))
## [1] 9844
length(unique(HEI_Aim2_Long_1KidPerFamily$subjectid))
## [1] 9844

Copy baseline variables to all timepoints for full df

#rename so can use later
names(HEI_Aim2_Long)[names(HEI_Aim2_Long) == 'prnt.empl.bl'] <- 'prnt.empl.b'

#create dataset for table and comparison
baseline_vars <- subset(HEI_Aim2_Long, HEI_Aim2_Long$eventname=="Baseline", select = c("subjectid", "sex", "race_ethnicity", "high.educ", "neighb_phenx_avg_p", "overall.income.b", "prnt.empl.b"))

#rename variables
names(baseline_vars)[names(baseline_vars) == 'sex'] <- 'sex.bl'
names(baseline_vars)[names(baseline_vars) == 'race_ethnicity'] <- 'race_ethnicity.bl'
names(baseline_vars)[names(baseline_vars) == 'high.educ'] <- 'high.educ.bl'
names(baseline_vars)[names(baseline_vars) == 'neighb_phenx_avg_p'] <- 'neighb_phenx_avg_p.bl'
names(baseline_vars)[names(baseline_vars) == 'overall.income.b'] <- 'overall.income.bl'
names(baseline_vars)[names(baseline_vars) == 'prnt.empl.b'] <- 'prnt.empl.bl'

#add to initial df
HEI_Aim2_Long_2 <- merge(HEI_Aim2_Long, baseline_vars, by="subjectid")

Descriptive Table before Cleaning

#factor eventname
HEI_Aim2_Long_2$eventname <- as.factor(HEI_Aim2_Long_2$eventname)
HEI_Aim2_Long_2$eventname <- relevel(HEI_Aim2_Long_2$eventname , ref="Baseline")

#create smaller df
df_prior <- subset(HEI_Aim2_Long_2,select=c("subjectid","abcd_site","eventname","interview_age","reshist_addr1_pm252016aa_bl","prnt.empl.bl","overall.income.bl","sex.bl","race_ethnicity.bl","high.educ.bl","neighb_phenx_avg_p.bl","cbcl_scr_syn_internal_r","cbcl_scr_syn_external_r","cbcl_scr_syn_anxdep_r","cbcl_scr_syn_withdep_r","cbcl_scr_syn_attention_r","cbcl_scr_syn_rulebreak_r","cbcl_scr_syn_aggressive_r","cbcl_scr_syn_totprob_r"))
#create table
des_table_prior <- tableby(eventname ~ ., data = df_prior[ , -which(names(df_prior) %in% c("subjectid"))], total=F) 
summary(des_table_prior, title = "Descriptive Statistics by Eventname Before Cleaning")
## 
## 
## Table: Descriptive Statistics by Eventname Before Cleaning
## 
## |                                         | Baseline (N=11839) | 1-year (N=11200)  |  2-year (N=7334)  | p value|
## |:----------------------------------------|:------------------:|:-----------------:|:-----------------:|-------:|
## |**abcd_site**                            |                    |                   |                   | < 0.001|
## |&nbsp;&nbsp;&nbsp;site01                 |     406 (3.4%)     |    369 (3.3%)     |    210 (2.9%)     |        |
## |&nbsp;&nbsp;&nbsp;site02                 |     558 (4.7%)     |    548 (4.9%)     |    351 (4.8%)     |        |
## |&nbsp;&nbsp;&nbsp;site03                 |     631 (5.3%)     |    563 (5.0%)     |    372 (5.1%)     |        |
## |&nbsp;&nbsp;&nbsp;site04                 |     745 (6.3%)     |    727 (6.5%)     |    534 (7.3%)     |        |
## |&nbsp;&nbsp;&nbsp;site05                 |     378 (3.2%)     |    357 (3.2%)     |    234 (3.2%)     |        |
## |&nbsp;&nbsp;&nbsp;site06                 |     584 (4.9%)     |    568 (5.1%)     |    379 (5.2%)     |        |
## |&nbsp;&nbsp;&nbsp;site07                 |     339 (2.9%)     |    322 (2.9%)     |    116 (1.6%)     |        |
## |&nbsp;&nbsp;&nbsp;site08                 |     350 (3.0%)     |    339 (3.0%)     |    212 (2.9%)     |        |
## |&nbsp;&nbsp;&nbsp;site09                 |     433 (3.7%)     |    393 (3.5%)     |    227 (3.1%)     |        |
## |&nbsp;&nbsp;&nbsp;site10                 |     739 (6.2%)     |    708 (6.3%)     |    493 (6.7%)     |        |
## |&nbsp;&nbsp;&nbsp;site11                 |     450 (3.8%)     |    400 (3.6%)     |    197 (2.7%)     |        |
## |&nbsp;&nbsp;&nbsp;site12                 |     604 (5.1%)     |    550 (4.9%)     |    274 (3.7%)     |        |
## |&nbsp;&nbsp;&nbsp;site13                 |     728 (6.1%)     |    691 (6.2%)     |    443 (6.0%)     |        |
## |&nbsp;&nbsp;&nbsp;site14                 |     606 (5.1%)     |    583 (5.2%)     |    430 (5.9%)     |        |
## |&nbsp;&nbsp;&nbsp;site15                 |     458 (3.9%)     |    426 (3.8%)     |    266 (3.6%)     |        |
## |&nbsp;&nbsp;&nbsp;site16                 |    1011 (8.5%)     |    979 (8.7%)     |    640 (8.7%)     |        |
## |&nbsp;&nbsp;&nbsp;site17                 |     578 (4.9%)     |    562 (5.0%)     |    374 (5.1%)     |        |
## |&nbsp;&nbsp;&nbsp;site18                 |     384 (3.2%)     |    376 (3.4%)     |    223 (3.0%)     |        |
## |&nbsp;&nbsp;&nbsp;site19                 |     550 (4.6%)     |    521 (4.7%)     |    397 (5.4%)     |        |
## |&nbsp;&nbsp;&nbsp;site20                 |     707 (6.0%)     |    687 (6.1%)     |    528 (7.2%)     |        |
## |&nbsp;&nbsp;&nbsp;site21                 |     600 (5.1%)     |    531 (4.7%)     |    434 (5.9%)     |        |
## |**interview_age**                        |                    |                   |                   | < 0.001|
## |&nbsp;&nbsp;&nbsp;Mean (SD)              |  118.967 (7.495)   |  131.073 (7.714)  |  143.361 (7.747)  |        |
## |&nbsp;&nbsp;&nbsp;Range                  | 107.000 - 133.000  | 116.000 - 149.000 | 127.000 - 164.000 |        |
## |**reshist_addr1_pm252016aa_bl**          |                    |                   |                   |   0.745|
## |&nbsp;&nbsp;&nbsp;N-Miss                 |        651         |        587        |        224        |        |
## |&nbsp;&nbsp;&nbsp;Mean (SD)              |   7.663 (1.563)    |   7.648 (1.561)   |   7.650 (1.535)   |        |
## |&nbsp;&nbsp;&nbsp;Range                  |   1.722 - 15.902   |  1.722 - 15.902   |  1.722 - 15.902   |        |
## |**prnt.empl.bl**                         |                    |                   |                   |   0.098|
## |&nbsp;&nbsp;&nbsp;N-Miss                 |         56         |        47         |        22         |        |
## |&nbsp;&nbsp;&nbsp;Employed               |    8194 (69.5%)    |   7826 (70.2%)    |   5214 (71.3%)    |        |
## |&nbsp;&nbsp;&nbsp;Other                  |     855 (7.3%)     |    791 (7.1%)     |    474 (6.5%)     |        |
## |&nbsp;&nbsp;&nbsp;Stay at Home Parent    |    2065 (17.5%)    |   1941 (17.4%)    |   1262 (17.3%)    |        |
## |&nbsp;&nbsp;&nbsp;Unemployed             |     669 (5.7%)     |    595 (5.3%)     |    362 (5.0%)     |        |
## |**overall.income.bl**                    |                    |                   |                   |   0.001|
## |&nbsp;&nbsp;&nbsp;N-Miss                 |         2          |         1         |         0         |        |
## |&nbsp;&nbsp;&nbsp;[<50k]                 |    3215 (27.2%)    |   2930 (26.2%)    |   1833 (25.0%)    |        |
## |&nbsp;&nbsp;&nbsp;[>=100K]               |    4544 (38.4%)    |   4419 (39.5%)    |   2952 (40.3%)    |        |
## |&nbsp;&nbsp;&nbsp;[>=50K & <100K]        |    3065 (25.9%)    |   2937 (26.2%)    |   2000 (27.3%)    |        |
## |&nbsp;&nbsp;&nbsp;[Don't Know or Refuse] |    1013 (8.6%)     |    913 (8.2%)     |    549 (7.5%)     |        |
## |**sex.bl**                               |                    |                   |                   |   0.906|
## |&nbsp;&nbsp;&nbsp;Female                 |    5658 (47.8%)    |   5335 (47.6%)    |   3481 (47.5%)    |        |
## |&nbsp;&nbsp;&nbsp;Male                   |    6181 (52.2%)    |   5865 (52.4%)    |   3853 (52.5%)    |        |
## |**race_ethnicity.bl**                    |                    |                   |                   | < 0.001|
## |&nbsp;&nbsp;&nbsp;N-Miss                 |         2          |         2         |         0         |        |
## |&nbsp;&nbsp;&nbsp;Asian                  |     250 (2.1%)     |    239 (2.1%)     |    158 (2.2%)     |        |
## |&nbsp;&nbsp;&nbsp;Black                  |    1777 (15.0%)    |   1594 (14.2%)    |    874 (11.9%)    |        |
## |&nbsp;&nbsp;&nbsp;Hispanic               |    2405 (20.3%)    |   2220 (19.8%)    |   1411 (19.2%)    |        |
## |&nbsp;&nbsp;&nbsp;Other                  |    1243 (10.5%)    |   1171 (10.5%)    |    724 (9.9%)     |        |
## |&nbsp;&nbsp;&nbsp;White                  |    6162 (52.1%)    |   5974 (53.3%)    |   4167 (56.8%)    |        |
## |**high.educ.bl**                         |                    |                   |                   | < 0.001|
## |&nbsp;&nbsp;&nbsp;N-Miss                 |         14         |        12         |        10         |        |
## |&nbsp;&nbsp;&nbsp;< HS Diploma           |     592 (5.0%)     |    526 (4.7%)     |    306 (4.2%)     |        |
## |&nbsp;&nbsp;&nbsp;Bachelor               |    3006 (25.4%)    |   2889 (25.8%)    |   2002 (27.3%)    |        |
## |&nbsp;&nbsp;&nbsp;HS Diploma/GED         |    1129 (9.5%)     |    1007 (9.0%)    |    568 (7.8%)     |        |
## |&nbsp;&nbsp;&nbsp;Post Graduate Degree   |    4025 (34.0%)    |   3919 (35.0%)    |   2611 (35.6%)    |        |
## |&nbsp;&nbsp;&nbsp;Some College           |    3073 (26.0%)    |   2847 (25.4%)    |   1837 (25.1%)    |        |
## |**neighb_phenx_avg_p.bl**                |                    |                   |                   |   0.003|
## |&nbsp;&nbsp;&nbsp;N-Miss                 |         8          |         5         |         3         |        |
## |&nbsp;&nbsp;&nbsp;Mean (SD)              |   3.890 (0.975)    |   3.903 (0.969)   |   3.938 (0.942)   |        |
## |&nbsp;&nbsp;&nbsp;Range                  |   1.000 - 5.000    |   1.000 - 5.000   |   1.000 - 5.000   |        |
## |**cbcl_scr_syn_internal_r**              |                    |                   |                   |   0.100|
## |&nbsp;&nbsp;&nbsp;N-Miss                 |         8          |        18         |         5         |        |
## |&nbsp;&nbsp;&nbsp;Mean (SD)              |   5.043 (5.522)    |   5.108 (5.551)   |   4.930 (5.614)   |        |
## |&nbsp;&nbsp;&nbsp;Range                  |   0.000 - 51.000   |  0.000 - 48.000   |  0.000 - 50.000   |        |
## |**cbcl_scr_syn_external_r**              |                    |                   |                   | < 0.001|
## |&nbsp;&nbsp;&nbsp;N-Miss                 |         8          |        18         |         5         |        |
## |&nbsp;&nbsp;&nbsp;Mean (SD)              |   4.455 (5.867)    |   4.176 (5.656)   |   3.918 (5.479)   |        |
## |&nbsp;&nbsp;&nbsp;Range                  |   0.000 - 49.000   |  0.000 - 47.000   |  0.000 - 46.000   |        |
## |**cbcl_scr_syn_anxdep_r**                |                    |                   |                   | < 0.001|
## |&nbsp;&nbsp;&nbsp;N-Miss                 |         8          |        18         |         5         |        |
## |&nbsp;&nbsp;&nbsp;Mean (SD)              |   2.516 (3.062)    |   2.540 (3.072)   |   2.322 (2.971)   |        |
## |&nbsp;&nbsp;&nbsp;Range                  |   0.000 - 26.000   |  0.000 - 22.000   |  0.000 - 22.000   |        |
## |**cbcl_scr_syn_withdep_r**               |                    |                   |                   | < 0.001|
## |&nbsp;&nbsp;&nbsp;N-Miss                 |         8          |        18         |         5         |        |
## |&nbsp;&nbsp;&nbsp;Mean (SD)              |   1.034 (1.709)    |   1.116 (1.778)   |   1.201 (1.901)   |        |
## |&nbsp;&nbsp;&nbsp;Range                  |   0.000 - 15.000   |  0.000 - 14.000   |  0.000 - 16.000   |        |
## |**cbcl_scr_syn_attention_r**             |                    |                   |                   | < 0.001|
## |&nbsp;&nbsp;&nbsp;N-Miss                 |         8          |        18         |         5         |        |
## |&nbsp;&nbsp;&nbsp;Mean (SD)              |   2.977 (3.495)    |   2.858 (3.431)   |   2.692 (3.298)   |        |
## |&nbsp;&nbsp;&nbsp;Range                  |   0.000 - 20.000   |  0.000 - 19.000   |  0.000 - 19.000   |        |
## |**cbcl_scr_syn_rulebreak_r**             |                    |                   |                   | < 0.001|
## |&nbsp;&nbsp;&nbsp;N-Miss                 |         8          |        18         |         5         |        |
## |&nbsp;&nbsp;&nbsp;Mean (SD)              |   1.192 (1.861)    |   1.120 (1.822)   |   1.057 (1.833)   |        |
## |&nbsp;&nbsp;&nbsp;Range                  |   0.000 - 20.000   |  0.000 - 20.000   |  0.000 - 23.000   |        |
## |**cbcl_scr_syn_aggressive_r**            |                    |                   |                   | < 0.001|
## |&nbsp;&nbsp;&nbsp;N-Miss                 |         8          |        18         |         5         |        |
## |&nbsp;&nbsp;&nbsp;Mean (SD)              |   3.262 (4.355)    |   3.056 (4.185)   |   2.861 (3.990)   |        |
## |&nbsp;&nbsp;&nbsp;Range                  |   0.000 - 36.000   |  0.000 - 33.000   |  0.000 - 32.000   |        |
## |**cbcl_scr_syn_totprob_r**               |                    |                   |                   | < 0.001|
## |&nbsp;&nbsp;&nbsp;N-Miss                 |         8          |        18         |         5         |        |
## |&nbsp;&nbsp;&nbsp;Mean (SD)              |  18.178 (17.968)   |  17.520 (17.567)  |  16.388 (17.001)  |        |
## |&nbsp;&nbsp;&nbsp;Range                  |  0.000 - 139.000   |  0.000 - 128.000  |  0.000 - 161.000  |        |

Creating variables for modeling and tables

The following variables are time-invariant, will use baseline covariates since PM2.5 collected at baseline: - reshist_addr1_pm252016aa_bl which is the Baseline PM2.5. - reshist_addr1_no2_2016_aavg_bl which is the Baseline NO2. - sex.bl - race_ethnicity.bl - high.educ.bl - prnt.empl.bl - neighb_phenx_avg_p.bl - overall.income.bl

The following variables are time-varying: - all CBCL outcomes - interview_age

Copy baseline variables to all timepoints for 1KidPerFamily

#rename so can use later
names(HEI_Aim2_Long_1KidPerFamily)[names(HEI_Aim2_Long_1KidPerFamily) == 'prnt.empl.bl'] <- 'prnt.empl.b'

#create dataset for table and comparison
baseline_vars_1KidPerFamily <- subset(HEI_Aim2_Long_1KidPerFamily, HEI_Aim2_Long_1KidPerFamily$eventname=="Baseline", select = c("subjectid", "sex", "race_ethnicity", "high.educ", "neighb_phenx_avg_p", "overall.income.b"))

#rename variables
names(baseline_vars_1KidPerFamily)[names(baseline_vars_1KidPerFamily) == 'sex'] <- 'sex.bl'
names(baseline_vars_1KidPerFamily)[names(baseline_vars_1KidPerFamily) == 'race_ethnicity'] <- 'race_ethnicity.bl'
names(baseline_vars_1KidPerFamily)[names(baseline_vars_1KidPerFamily) == 'high.educ'] <- 'high.educ.bl'
names(baseline_vars_1KidPerFamily)[names(baseline_vars_1KidPerFamily) == 'neighb_phenx_avg_p'] <- 'neighb_phenx_avg_p.bl'
names(baseline_vars_1KidPerFamily)[names(baseline_vars_1KidPerFamily) == 'overall.income.b'] <- 'overall.income.bl'
names(baseline_vars_1KidPerFamily)[names(baseline_vars_1KidPerFamily) == 'prnt.empl.b'] <- 'prnt.empl.bl'

#add to initial df
HEI_Aim2_Long_1KidPerFamily_2 <- merge(HEI_Aim2_Long_1KidPerFamily, baseline_vars, by="subjectid")

Clean 1KidPerFamily

## Cleaning
#merge Asian into Other group b/c statistically Asian group is too small
tapply(HEI_Aim2_Long_1KidPerFamily_2$race_ethnicity.bl, 
       HEI_Aim2_Long_1KidPerFamily_2$eventname,table, useNA = "always")
## $`1-year`
## 
##    Asian    Black Hispanic    Other    White     <NA> 
##      217     1334     1932      964     4803        1 
## 
## $`2-year`
## 
##    Asian    Black Hispanic    Other    White     <NA> 
##      141      715     1232      597     3326        0 
## 
## $Baseline
## 
##    Asian    Black Hispanic    Other    White     <NA> 
##      228     1496     2101     1029     4963        1
HEI_Aim2_Long_1KidPerFamily_2$race_ethnicity.bl <- 
  ifelse(HEI_Aim2_Long_1KidPerFamily_2$race_ethnicity.bl=="Asian","Other",
         HEI_Aim2_Long_1KidPerFamily_2$race_ethnicity.bl)

tapply(HEI_Aim2_Long_1KidPerFamily_2$race_ethnicity.bl, 
       HEI_Aim2_Long_1KidPerFamily_2$eventname,table, useNA = "always")
## $`1-year`
## 
##    Black Hispanic    Other    White     <NA> 
##     1334     1932     1181     4803        1 
## 
## $`2-year`
## 
##    Black Hispanic    Other    White     <NA> 
##      715     1232      738     3326        0 
## 
## $Baseline
## 
##    Black Hispanic    Other    White     <NA> 
##     1496     2101     1257     4963        1
#reformat variables
HEI_Aim2_Long_1KidPerFamily_2$eventname <- 
  as.factor(HEI_Aim2_Long_1KidPerFamily_2$eventname)

HEI_Aim2_Long_1KidPerFamily_2$eventname <- 
  relevel(HEI_Aim2_Long_1KidPerFamily_2$eventname , ref="Baseline")

table(HEI_Aim2_Long_1KidPerFamily_2$eventname, useNA = "always")
## 
## Baseline   1-year   2-year     <NA> 
##     9818     9251     6011        0
HEI_Aim2_Long_1KidPerFamily_2$cbcl_scr_syn_internal_r <- as.numeric(HEI_Aim2_Long_1KidPerFamily_2$cbcl_scr_syn_internal_r)
HEI_Aim2_Long_1KidPerFamily_2$cbcl_scr_syn_external_r <- as.numeric(HEI_Aim2_Long_1KidPerFamily_2$cbcl_scr_syn_external_r)
HEI_Aim2_Long_1KidPerFamily_2$cbcl_scr_syn_anxdep_r <- as.numeric(HEI_Aim2_Long_1KidPerFamily_2$cbcl_scr_syn_anxdep_r)
HEI_Aim2_Long_1KidPerFamily_2$cbcl_scr_syn_withdep_r <- as.numeric(HEI_Aim2_Long_1KidPerFamily_2$cbcl_scr_syn_withdep_r)
HEI_Aim2_Long_1KidPerFamily_2$cbcl_scr_syn_attention_r <- as.numeric(HEI_Aim2_Long_1KidPerFamily_2$cbcl_scr_syn_attention_r)
HEI_Aim2_Long_1KidPerFamily_2$cbcl_scr_syn_rulebreak_r <- as.numeric(HEI_Aim2_Long_1KidPerFamily_2$cbcl_scr_syn_rulebreak_r)
HEI_Aim2_Long_1KidPerFamily_2$cbcl_scr_syn_aggressive_r <- as.numeric(HEI_Aim2_Long_1KidPerFamily_2$cbcl_scr_syn_aggressive_r)
HEI_Aim2_Long_1KidPerFamily_2$cbcl_scr_syn_totprob_r <- as.numeric(HEI_Aim2_Long_1KidPerFamily_2$cbcl_scr_syn_totprob_r)
HEI_Aim2_Long_1KidPerFamily_2$abcd_site <- as.factor(HEI_Aim2_Long_1KidPerFamily_2$abcd_site)
HEI_Aim2_Long_1KidPerFamily_2$subjectid <- as.factor(HEI_Aim2_Long_1KidPerFamily_2$subjectid)
HEI_Aim2_Long_1KidPerFamily_2$prnt.empl.bl <- factor(HEI_Aim2_Long_1KidPerFamily_2$prnt.empl.bl, levels = c("Employed", "Stay at Home Parent", "Unemployed", "Other"))
HEI_Aim2_Long_1KidPerFamily_2$overall.income.bl <- factor(HEI_Aim2_Long_1KidPerFamily_2$overall.income.bl, levels = c("[>=100K]", "[>=50K & <100K]", "[<50k]", "[Don't Know or Refuse]"))
HEI_Aim2_Long_1KidPerFamily_2$sex.bl <- factor(HEI_Aim2_Long_1KidPerFamily_2$sex.bl, levels = c("Male", "Female"))
HEI_Aim2_Long_1KidPerFamily_2$race_ethnicity.bl <- factor(HEI_Aim2_Long_1KidPerFamily_2$race_ethnicity.bl, levels = c("White", "Hispanic", "Black", "Other"))
HEI_Aim2_Long_1KidPerFamily_2$high.educ.bl <- factor(HEI_Aim2_Long_1KidPerFamily_2$high.educ.bl, levels = c("Post Graduate Degree", "Bachelor", "Some College", "HS Diploma/GED", "< HS Diploma"))

Create Final Dataset

#create smaller df
df <- subset(HEI_Aim2_Long_1KidPerFamily_2,select=c("subjectid","abcd_site","eventname","interview_age","reshist_addr1_pm252016aa_bl","reshist_addr1_no2_2016_aavg_bl","prnt.empl.bl","overall.income.bl","sex.bl","race_ethnicity.bl","high.educ.bl","neighb_phenx_avg_p.bl","cbcl_scr_syn_internal_r","cbcl_scr_syn_external_r","cbcl_scr_syn_anxdep_r","cbcl_scr_syn_withdep_r","cbcl_scr_syn_attention_r","cbcl_scr_syn_rulebreak_r","cbcl_scr_syn_aggressive_r","cbcl_scr_syn_totprob_r"))

#complete cases because needed for zinb
df_cc <- df[complete.cases(df),]

#percentage of subjects lost with complete.cases
lost_sub <- data.frame(table(df$eventname))
colnames(lost_sub) <- c("eventname","abcd")
interim <- data.frame(table(df_cc$eventname))[2]
lost_sub$sample <- interim$Freq
lost_sub$diff <- lost_sub$abcd - lost_sub$sample
lost_sub$percent <- lost_sub$diff/lost_sub$abcd

#center age at 9years-old (i.e., 108 months)
df_cc$interview_age.c9 <- df_cc$interview_age-108
#change to years
df_cc$interview_age.c9.y <- df_cc$interview_age.c9/12
#center pm2.5 to 5 (recommended by WHO)
df_cc$reshist_addr1_pm252016aa_bl.c5 <- df_cc$reshist_addr1_pm252016aa_bl-5
#center no2 to 5.33 (recommended by WHO)
df_cc$reshist_addr1_no2_2016_aavg_bl.c533 <- df_cc$reshist_addr1_no2_2016_aavg_bl-5.33

#center around mean
neighb_phenx_avg_p.bl.cm <- df_cc$neighb_phenx_avg_p.bl - mean(df_cc$neighb_phenx_avg_p.bl)

#create table
des_table <- tableby(eventname ~ ., data = df_cc[ , -which(names(df_cc) %in% c("subjectid"))], total=F) 
summary(des_table, title = "Descriptive Statistics by Eventname After Cleaning")
Descriptive Statistics by Eventname After Cleaning
Baseline (N=9271) 1-year (N=8759) 2-year (N=5827) p value
abcd_site < 0.001
   site01 323 (3.5%) 299 (3.4%) 184 (3.2%)
   site02 305 (3.3%) 298 (3.4%) 210 (3.6%)
   site03 556 (6.0%) 493 (5.6%) 330 (5.7%)
   site04 631 (6.8%) 615 (7.0%) 451 (7.7%)
   site05 322 (3.5%) 305 (3.5%) 203 (3.5%)
   site06 509 (5.5%) 495 (5.7%) 334 (5.7%)
   site07 290 (3.1%) 275 (3.1%) 102 (1.8%)
   site08 302 (3.3%) 289 (3.3%) 183 (3.1%)
   site09 404 (4.4%) 366 (4.2%) 214 (3.7%)
   site10 623 (6.7%) 592 (6.8%) 427 (7.3%)
   site11 382 (4.1%) 339 (3.9%) 166 (2.8%)
   site12 481 (5.2%) 438 (5.0%) 236 (4.1%)
   site13 617 (6.7%) 585 (6.7%) 387 (6.6%)
   site14 359 (3.9%) 344 (3.9%) 255 (4.4%)
   site15 398 (4.3%) 371 (4.2%) 232 (4.0%)
   site16 794 (8.6%) 769 (8.8%) 500 (8.6%)
   site17 509 (5.5%) 495 (5.7%) 336 (5.8%)
   site18 351 (3.8%) 344 (3.9%) 206 (3.5%)
   site19 216 (2.3%) 208 (2.4%) 185 (3.2%)
   site20 447 (4.8%) 433 (4.9%) 327 (5.6%)
   site21 452 (4.9%) 406 (4.6%) 359 (6.2%)
interview_age < 0.001
   Mean (SD) 118.856 (7.411) 130.924 (7.617) 143.081 (7.635)
   Range 107.000 - 133.000 117.000 - 149.000 127.000 - 164.000
reshist_addr1_pm252016aa_bl 0.771
   Mean (SD) 7.706 (1.571) 7.693 (1.571) 7.690 (1.552)
   Range 1.722 - 15.902 1.722 - 15.902 1.722 - 15.902
reshist_addr1_no2_2016_aavg_bl 0.972
   Mean (SD) 18.595 (5.571) 18.579 (5.567) 18.575 (5.573)
   Range 0.729 - 37.940 0.729 - 37.940 0.729 - 35.346
prnt.empl.bl 0.373
   Employed 6444 (69.5%) 6146 (70.2%) 4139 (71.0%)
   Stay at Home Parent 1612 (17.4%) 1516 (17.3%) 1002 (17.2%)
   Unemployed 539 (5.8%) 481 (5.5%) 299 (5.1%)
   Other 676 (7.3%) 616 (7.0%) 387 (6.6%)
overall.income.bl 0.018
   [>=100K] 3514 (37.9%) 3418 (39.0%) 2307 (39.6%)
   [>=50K & <100K] 2419 (26.1%) 2312 (26.4%) 1597 (27.4%)
   [<50k] 2564 (27.7%) 2335 (26.7%) 1494 (25.6%)
   [Don’t Know or Refuse] 774 (8.3%) 694 (7.9%) 429 (7.4%)
sex.bl 0.861
   Male 4858 (52.4%) 4605 (52.6%) 3080 (52.9%)
   Female 4413 (47.6%) 4154 (47.4%) 2747 (47.1%)
race_ethnicity.bl < 0.001
   White 4759 (51.3%) 4612 (52.7%) 3236 (55.5%)
   Hispanic 1958 (21.1%) 1800 (20.6%) 1198 (20.6%)
   Black 1363 (14.7%) 1221 (13.9%) 678 (11.6%)
   Other 1191 (12.8%) 1126 (12.9%) 715 (12.3%)
high.educ.bl 0.002
   Post Graduate Degree 3179 (34.3%) 3090 (35.3%) 2093 (35.9%)
   Bachelor 2310 (24.9%) 2217 (25.3%) 1551 (26.6%)
   Some College 2423 (26.1%) 2247 (25.7%) 1466 (25.2%)
   HS Diploma/GED 899 (9.7%) 800 (9.1%) 460 (7.9%)
   < HS Diploma 460 (5.0%) 405 (4.6%) 257 (4.4%)
neighb_phenx_avg_p.bl 0.034
   Mean (SD) 3.873 (0.976) 3.884 (0.971) 3.915 (0.948)
   Range 1.000 - 5.000 1.000 - 5.000 1.000 - 5.000
cbcl_scr_syn_internal_r 0.042
   Mean (SD) 5.154 (5.539) 5.281 (5.612) 5.047 (5.610)
   Range 0.000 - 51.000 0.000 - 48.000 0.000 - 50.000
cbcl_scr_syn_external_r < 0.001
   Mean (SD) 4.484 (5.798) 4.232 (5.622) 3.949 (5.368)
   Range 0.000 - 49.000 0.000 - 46.000 0.000 - 46.000
cbcl_scr_syn_anxdep_r < 0.001
   Mean (SD) 2.569 (3.060) 2.613 (3.091) 2.352 (2.941)
   Range 0.000 - 26.000 0.000 - 22.000 0.000 - 22.000
cbcl_scr_syn_withdep_r < 0.001
   Mean (SD) 1.045 (1.700) 1.151 (1.800) 1.235 (1.912)
   Range 0.000 - 14.000 0.000 - 14.000 0.000 - 16.000
cbcl_scr_syn_attention_r < 0.001
   Mean (SD) 3.042 (3.508) 2.941 (3.458) 2.782 (3.324)
   Range 0.000 - 19.000 0.000 - 19.000 0.000 - 19.000
cbcl_scr_syn_rulebreak_r < 0.001
   Mean (SD) 1.204 (1.844) 1.142 (1.823) 1.068 (1.829)
   Range 0.000 - 20.000 0.000 - 20.000 0.000 - 23.000
cbcl_scr_syn_aggressive_r < 0.001
   Mean (SD) 3.280 (4.305) 3.090 (4.154) 2.881 (3.894)
   Range 0.000 - 36.000 0.000 - 33.000 0.000 - 32.000
cbcl_scr_syn_totprob_r < 0.001
   Mean (SD) 18.519 (17.931) 17.986 (17.659) 16.755 (16.864)
   Range 0.000 - 139.000 0.000 - 128.000 0.000 - 161.000
interview_age.c9 < 0.001
   Mean (SD) 10.856 (7.411) 22.924 (7.617) 35.081 (7.635)
   Range -1.000 - 25.000 9.000 - 41.000 19.000 - 56.000
interview_age.c9.y < 0.001
   Mean (SD) 0.905 (0.618) 1.910 (0.635) 2.923 (0.636)
   Range -0.083 - 2.083 0.750 - 3.417 1.583 - 4.667
reshist_addr1_pm252016aa_bl.c5 0.771
   Mean (SD) 2.706 (1.571) 2.693 (1.571) 2.690 (1.552)
   Range -3.278 - 10.902 -3.278 - 10.902 -3.278 - 10.902
reshist_addr1_no2_2016_aavg_bl.c533 0.972
   Mean (SD) 13.265 (5.571) 13.249 (5.567) 13.245 (5.573)
   Range -4.601 - 32.610 -4.601 - 32.610 -4.601 - 30.016
Final_DF_Descriptives <- summary(des_table, title = "Descriptive Statistics by Eventname After Cleaning")
#write.csv(Final_DF_Descriptives, "Descriptives_Final_Dataset.csv")

Create Comparison b/t Full and Final Datasets

# Using the following two CBCL data as full and final analytic sample
Full_Data <- 
  df_prior[,c("subjectid","abcd_site","eventname","interview_age","prnt.empl.bl","overall.income.bl","sex.bl","race_ethnicity.bl","high.educ.bl","neighb_phenx_avg_p.bl")]

Analysis_Data <- 
  df_cc[,c("subjectid","abcd_site","eventname","interview_age","prnt.empl.bl","overall.income.bl","sex.bl","race_ethnicity.bl","high.educ.bl","neighb_phenx_avg_p.bl")]

str(Full_Data)

‘data.frame’: 30373 obs. of 10 variables: $ subjectid : chr “NDAR_INV003RTV85” “NDAR_INV003RTV85” “NDAR_INV005V6D2C” “NDAR_INV005V6D2C” … $ abcd_site : chr “site06” “site06” “site10” “site10” … $ eventname : Factor w/ 3 levels “Baseline”,“1-year”,..: 2 1 2 1 2 1 2 3 1 2 … $ interview_age : int 143 131 130 121 123 112 142 152 130 138 … $ prnt.empl.bl : chr “Employed” “Employed” “Stay at Home Parent” “Stay at Home Parent” … $ overall.income.bl : chr “[>=50K & <100K]” “[>=50K & <100K]” “[Don’t Know or Refuse]” “[Don’t Know or Refuse]” … $ sex.bl : chr “Female” “Female” “Female” “Female” … $ race_ethnicity.bl : chr “White” “White” “Hispanic” “Hispanic” … $ high.educ.bl : chr “HS Diploma/GED” “HS Diploma/GED” “< HS Diploma” “< HS Diploma” … $ neighb_phenx_avg_p.bl: num 5 5 4.33 4.33 5 …

str(Analysis_Data)

‘data.frame’: 23857 obs. of 10 variables: $ subjectid : Factor w/ 9818 levels “NDAR_INV003RTV85”,..: 2 2 3 3 4 4 4 5 5 5 … $ abcd_site : Factor w/ 21 levels “site01”,“site02”,..: 10 10 7 7 20 20 20 12 12 12 … $ eventname : Factor w/ 3 levels “Baseline”,“1-year”,..: 2 1 2 1 2 3 1 2 3 1 … $ interview_age : int 130 121 123 112 142 152 130 138 149 124 … $ prnt.empl.bl : Factor w/ 4 levels “Employed”,“Stay at Home Parent”,..: 2 2 1 1 1 1 1 1 1 1 … $ overall.income.bl : Factor w/ 4 levels “[>=100K]”,“[>=50K & <100K]”,..: 4 4 1 1 3 3 3 4 4 4 … $ sex.bl : Factor w/ 2 levels “Male”,“Female”: 2 2 1 1 1 1 1 1 1 1 … $ race_ethnicity.bl : Factor w/ 4 levels “White”,“Hispanic”,..: 2 2 1 1 1 1 1 3 3 3 … $ high.educ.bl : Factor w/ 5 levels “Post Graduate Degree”,..: 5 5 1 1 3 3 3 4 4 4 … $ neighb_phenx_avg_p.bl: num 4.33 4.33 5 5 3.67 …

Full_Data$label = "Full_data"
Analysis_Data$label = "Analysis_data"

# Merging the two data set to be used in arsenal package

CBCL_Combined_Data <- rbind(Full_Data, Analysis_Data)
names(CBCL_Combined_Data)

[1] “subjectid” “abcd_site” “eventname”
[4] “interview_age” “prnt.empl.bl” “overall.income.bl”
[7] “sex.bl” “race_ethnicity.bl” “high.educ.bl”
[10] “neighb_phenx_avg_p.bl” “label”

dim(CBCL_Combined_Data)

[1] 54230 11

dim(Full_Data)[1] + dim(Analysis_Data)[1]

[1] 54230

# Baseline comparison--------------------------------------------------------
Combined_Data_rbl_Baseline <-  
  tableby(label ~ sex.bl + interview_age +
  race_ethnicity.bl + high.educ.bl + prnt.empl.bl + neighb_phenx_avg_p.bl +
  overall.income.bl, 
  subset= (eventname=="Baseline"),
  data = CBCL_Combined_Data, total=F)

Combined_Data_Compare_Baseline <- summary(Combined_Data_rbl_Baseline)
Combined_Data_Compare_Baseline
Analysis_data (N=9271) Full_data (N=11839) p value
sex.bl 0.783
   Female 4413 (47.6%) 5658 (47.8%)
   Male 4858 (52.4%) 6181 (52.2%)
interview_age 0.283
   Mean (SD) 118.856 (7.411) 118.967 (7.495)
   Range 107.000 - 133.000 107.000 - 133.000
race_ethnicity.bl < 0.001
   N-Miss 0 2
   Asian 0 (0.0%) 250 (2.1%)
   Black 1363 (14.7%) 1777 (15.0%)
   Hispanic 1958 (21.1%) 2405 (20.3%)
   Other 1191 (12.8%) 1243 (10.5%)
   White 4759 (51.3%) 6162 (52.1%)
high.educ.bl 0.938
   N-Miss 0 14
   < HS Diploma 460 (5.0%) 592 (5.0%)
   Bachelor 2310 (24.9%) 3006 (25.4%)
   HS Diploma/GED 899 (9.7%) 1129 (9.5%)
   Post Graduate Degree 3179 (34.3%) 4025 (34.0%)
   Some College 2423 (26.1%) 3073 (26.0%)
prnt.empl.bl 0.972
   N-Miss 0 56
   Employed 6444 (69.5%) 8194 (69.5%)
   Other 676 (7.3%) 855 (7.3%)
   Stay at Home Parent 1612 (17.4%) 2065 (17.5%)
   Unemployed 539 (5.8%) 669 (5.7%)
neighb_phenx_avg_p.bl 0.210
   N-Miss 0 8
   Mean (SD) 3.873 (0.976) 3.890 (0.975)
   Range 1.000 - 5.000 1.000 - 5.000
overall.income.bl 0.769
   N-Miss 0 2
   [<50k] 2564 (27.7%) 3215 (27.2%)
   [>=100K] 3514 (37.9%) 4544 (38.4%)
   [>=50K & <100K] 2419 (26.1%) 3065 (25.9%)
   [Don’t Know or Refuse] 774 (8.3%) 1013 (8.6%)
#write.csv(Combined_Data_Compare_Baseline, "Combined_Data_Compare_Baseline.csv")

## Sanity check to see above p-value matches regular chi-squared in R and it does!
chisq.test(CBCL_Combined_Data[which(CBCL_Combined_Data$eventname=="Baseline"),]$label, 
           CBCL_Combined_Data[which(CBCL_Combined_Data$eventname=="Baseline"),]$sex.bl,
           correct = FALSE)
Pearson's Chi-squared test

data: CBCL_Combined_Data[which(CBCL_Combined_Data\(eventname == "Baseline"), ]\)label and CBCL_Combined_Data[which(CBCL_Combined_Data\(eventname == "Baseline"), ]\)sex.bl X-squared = 0.076155, df = 1, p-value = 0.7826

chisq.test(CBCL_Combined_Data[which(CBCL_Combined_Data$eventname=="Baseline"),]$label, 
           CBCL_Combined_Data[which(CBCL_Combined_Data$eventname=="Baseline"),]$race_ethnicity.bl,
           correct = FALSE)
Pearson's Chi-squared test

data: CBCL_Combined_Data[which(CBCL_Combined_Data\(eventname == "Baseline"), ]\)label and CBCL_Combined_Data[which(CBCL_Combined_Data\(eventname == "Baseline"), ]\)race_ethnicity.bl X-squared = 223.09, df = 4, p-value < 2.2e-16

chisq.test(CBCL_Combined_Data[which(CBCL_Combined_Data$eventname=="Baseline"),]$label, 
           CBCL_Combined_Data[which(CBCL_Combined_Data$eventname=="Baseline"),]$high.educ.bl,
           correct = FALSE)
Pearson's Chi-squared test

data: CBCL_Combined_Data[which(CBCL_Combined_Data\(eventname == "Baseline"), ]\)label and CBCL_Combined_Data[which(CBCL_Combined_Data\(eventname == "Baseline"), ]\)high.educ.bl X-squared = 0.80586, df = 4, p-value = 0.9377

summary(aov(interview_age ~ label,
    data=CBCL_Combined_Data[which(CBCL_Combined_Data$eventname=="Baseline"),]))
           Df  Sum Sq Mean Sq F value Pr(>F)

label 1 64 64.07 1.152 0.283 Residuals 21108 1174261 55.63

#---------------------------------------------------------------------------------------
# One Year Comparison
Combined_Data_rbl_OneYear <-  
  tableby(label ~ sex.bl + interview_age +
  race_ethnicity.bl + high.educ.bl + prnt.empl.bl + neighb_phenx_avg_p.bl +
  overall.income.bl, 
  subset= (eventname=="1-year"),
  data = CBCL_Combined_Data, total=F)

Combined_Data_Compare_OneYear <- summary(Combined_Data_rbl_OneYear)
Combined_Data_Compare_OneYear
Analysis_data (N=8759) Full_data (N=11200) p value
sex.bl 0.770
   Female 4154 (47.4%) 5335 (47.6%)
   Male 4605 (52.6%) 5865 (52.4%)
interview_age 0.173
   Mean (SD) 130.924 (7.617) 131.073 (7.714)
   Range 117.000 - 149.000 116.000 - 149.000
race_ethnicity.bl < 0.001
   N-Miss 0 2
   Asian 0 (0.0%) 239 (2.1%)
   Black 1221 (13.9%) 1594 (14.2%)
   Hispanic 1800 (20.6%) 2220 (19.8%)
   Other 1126 (12.9%) 1171 (10.5%)
   White 4612 (52.7%) 5974 (53.3%)
high.educ.bl 0.934
   N-Miss 0 12
   < HS Diploma 405 (4.6%) 526 (4.7%)
   Bachelor 2217 (25.3%) 2889 (25.8%)
   HS Diploma/GED 800 (9.1%) 1007 (9.0%)
   Post Graduate Degree 3090 (35.3%) 3919 (35.0%)
   Some College 2247 (25.7%) 2847 (25.4%)
prnt.empl.bl 0.965
   N-Miss 0 47
   Employed 6146 (70.2%) 7826 (70.2%)
   Other 616 (7.0%) 791 (7.1%)
   Stay at Home Parent 1516 (17.3%) 1941 (17.4%)
   Unemployed 481 (5.5%) 595 (5.3%)
neighb_phenx_avg_p.bl 0.185
   N-Miss 0 5
   Mean (SD) 3.884 (0.971) 3.903 (0.969)
   Range 1.000 - 5.000 1.000 - 5.000
overall.income.bl 0.784
   N-Miss 0 1
   [<50k] 2335 (26.7%) 2930 (26.2%)
   [>=100K] 3418 (39.0%) 4419 (39.5%)
   [>=50K & <100K] 2312 (26.4%) 2937 (26.2%)
   [Don’t Know or Refuse] 694 (7.9%) 913 (8.2%)
#write.csv(Combined_Data_Compare_OneYear,  "Combined_Data_Compare_OneYear.csv")

#---------------------------------------------------------------------------------------
# Two Year Comparison
Combined_Data_rbl_TwoYear <-  
  tableby(label ~ sex.bl + interview_age +
  race_ethnicity.bl + high.educ.bl + prnt.empl.bl + neighb_phenx_avg_p.bl +
  overall.income.bl, 
  subset= (eventname=="2-year"),
  data = CBCL_Combined_Data, total=F)

Combined_Data_Compare_TwoYear <- summary(Combined_Data_rbl_TwoYear)
Combined_Data_Compare_TwoYear
Analysis_data (N=5827) Full_data (N=7334) p value
sex.bl 0.714
   Female 2747 (47.1%) 3481 (47.5%)
   Male 3080 (52.9%) 3853 (52.5%)
interview_age 0.038
   Mean (SD) 143.081 (7.635) 143.361 (7.747)
   Range 127.000 - 164.000 127.000 - 164.000
race_ethnicity.bl < 0.001
   Asian 0 (0.0%) 158 (2.2%)
   Black 678 (11.6%) 874 (11.9%)
   Hispanic 1198 (20.6%) 1411 (19.2%)
   Other 715 (12.3%) 724 (9.9%)
   White 3236 (55.5%) 4167 (56.8%)
high.educ.bl 0.881
   N-Miss 0 10
   < HS Diploma 257 (4.4%) 306 (4.2%)
   Bachelor 1551 (26.6%) 2002 (27.3%)
   HS Diploma/GED 460 (7.9%) 568 (7.8%)
   Post Graduate Degree 2093 (35.9%) 2611 (35.6%)
   Some College 1466 (25.2%) 1837 (25.1%)
prnt.empl.bl 0.945
   N-Miss 0 22
   Employed 4139 (71.0%) 5214 (71.3%)
   Other 387 (6.6%) 474 (6.5%)
   Stay at Home Parent 1002 (17.2%) 1262 (17.3%)
   Unemployed 299 (5.1%) 362 (5.0%)
neighb_phenx_avg_p.bl 0.161
   N-Miss 0 3
   Mean (SD) 3.915 (0.948) 3.938 (0.942)
   Range 1.000 - 5.000 1.000 - 5.000
overall.income.bl 0.807
   [<50k] 1494 (25.6%) 1833 (25.0%)
   [>=100K] 2307 (39.6%) 2952 (40.3%)
   [>=50K & <100K] 1597 (27.4%) 2000 (27.3%)
   [Don’t Know or Refuse] 429 (7.4%) 549 (7.5%)
#write.csv(Combined_Data_Compare_TwoYear,  "Combined_Data_Compare_TwoYear.csv")

Code to Check Skewness

## Notes on how to check skewness
# If skewness is less than -1 or greater than 1, the distribution is highly skewed.
# If the kurtosis is greater than 3, then the data set has heavier tails than a normal
# distribution (more in the tails).  If the kurtosis is less than 3, then the 
# has lighter tails than a normal distribution (less in the tails). 
skew(df_cc$cbcl_scr_syn_internal_r)
## [1] 1.906468
kurtosis(df_cc$cbcl_scr_syn_internal_r)
## [1] 4.982213

Distribution of outcome variables by eventname

ggplot(df_cc,aes(cbcl_scr_syn_internal_r)) + geom_histogram(binwidth = 1) + facet_grid(.~eventname)

ggplot(df_cc,aes(cbcl_scr_syn_external_r)) + geom_histogram(binwidth = 1) + facet_grid(.~eventname)

ggplot(df_cc,aes(cbcl_scr_syn_anxdep_r)) + geom_histogram(binwidth = 1) + facet_grid(.~eventname)

ggplot(df_cc,aes(cbcl_scr_syn_withdep_r)) + geom_histogram(binwidth = 1) + facet_grid(.~eventname)

ggplot(df_cc,aes(cbcl_scr_syn_attention_r)) + geom_histogram(binwidth = 1) + facet_grid(.~eventname)

ggplot(df_cc,aes(cbcl_scr_syn_rulebreak_r)) + geom_histogram(binwidth = 1) + facet_grid(.~eventname)

ggplot(df_cc,aes(cbcl_scr_syn_aggressive_r)) + geom_histogram(binwidth = 1) + facet_grid(.~eventname)

ggplot(df_cc,aes(cbcl_scr_syn_totprob_r)) + geom_histogram(binwidth = 1) + facet_grid(.~eventname)

Check overdispersion for each outcome

#check variance/mean ratio for overdispersion (greater than 1)
var(df_cc$cbcl_scr_syn_internal_r) %>% round(4)/mean(df_cc$cbcl_scr_syn_internal_r) %>% round(4)
## [1] 6.025142
var(df_cc$cbcl_scr_syn_external_r) %>% round(4)/mean(df_cc$cbcl_scr_syn_external_r) %>% round(4)
## [1] 7.451029
var(df_cc$cbcl_scr_syn_anxdep_r) %>% round(4)/mean(df_cc$cbcl_scr_syn_anxdep_r) %>% round(4)
## [1] 3.66044
var(df_cc$cbcl_scr_syn_withdep_r) %>% round(4)/mean(df_cc$cbcl_scr_syn_withdep_r) %>% round(4)
## [1] 2.841002
var(df_cc$cbcl_scr_syn_attention_r) %>% round(4)/mean(df_cc$cbcl_scr_syn_attention_r) %>% round(4)
## [1] 4.038821
var(df_cc$cbcl_scr_syn_rulebreak_r) %>% round(4)/mean(df_cc$cbcl_scr_syn_rulebreak_r) %>% round(4)
## [1] 2.927258
var(df_cc$cbcl_scr_syn_aggressive_r) %>% round(4)/mean(df_cc$cbcl_scr_syn_aggressive_r) %>% round(4)
## [1] 5.545168
var(df_cc$cbcl_scr_syn_totprob_r) %>% round(4)/mean(df_cc$cbcl_scr_syn_totprob_r) %>% round(4)
## [1] 17.28948

Percentage of 0’s for each CBCL Outcome

zeros <- df_cc %>% 
  group_by(eventname) %>% 
  dplyr::select(starts_with(c("eventname","cbcl"))) %>% 
  summarise_each(funs(sum(.==0)))
## Warning: `summarise_each_()` was deprecated in dplyr 0.7.0.
## ℹ Please use `across()` instead.
## ℹ The deprecated feature was likely used in the dplyr package.
##   Please report the issue at <https://github.com/tidyverse/dplyr/issues>.
## This warning is displayed once every 8 hours.
## Call `lifecycle::last_lifecycle_warnings()` to see where this warning was
## generated.
## Warning: `funs()` was deprecated in dplyr 0.8.0.
## ℹ Please use a list of either functions or lambdas:
## 
## # Simple named list: list(mean = mean, median = median)
## 
## # Auto named with `tibble::lst()`: tibble::lst(mean, median)
## 
## # Using lambdas list(~ mean(., trim = .2), ~ median(., na.rm = TRUE))
## Call `lifecycle::last_lifecycle_warnings()` to see where this warning was
## generated.
all <- df_cc %>% 
  group_by(eventname) %>% 
  dplyr::select(starts_with(c("eventname","cbcl"))) %>% 
  summarise_each(funs(n()))
## Warning: `funs()` was deprecated in dplyr 0.8.0.
## ℹ Please use a list of either functions or lambdas:
## 
## # Simple named list: list(mean = mean, median = median)
## 
## # Auto named with `tibble::lst()`: tibble::lst(mean, median)
## 
## # Using lambdas list(~ mean(., trim = .2), ~ median(., na.rm = TRUE))
## Call `lifecycle::last_lifecycle_warnings()` to see where this warning was
## generated.
zero_percentage <- cbind(zeros[1],round(zeros[-1]/all[-1],4)*100)

zero_percentage
##   eventname cbcl_scr_syn_internal_r cbcl_scr_syn_external_r
## 1  Baseline                   16.32                   26.50
## 2    1-year                   15.31                   27.57
## 3    2-year                   17.62                   29.71
##   cbcl_scr_syn_anxdep_r cbcl_scr_syn_withdep_r cbcl_scr_syn_attention_r
## 1                 29.77                  55.18                    31.91
## 2                 29.46                  52.25                    32.53
## 3                 33.24                  51.74                    34.00
##   cbcl_scr_syn_rulebreak_r cbcl_scr_syn_aggressive_r cbcl_scr_syn_totprob_r
## 1                    50.78                     32.35                   4.21
## 2                    53.24                     33.30                   4.53
## 3                    56.08                     35.37                   5.44

Percentage of 0’s for all timepoints for each CBCL Outcome

#select certain columns
subject_sum <- df_cc %>% 
  dplyr::select(starts_with(c("subject","cbcl")))
#add up all scores across subjec
subject_summed <- ddply(subject_sum, "subjectid", numcolwise(sum))
#look at data
describe(subject_summed[,2:9])
##                           vars    n  mean    sd median trimmed   mad min max
## cbcl_scr_syn_internal_r      1 9273 13.31 13.36      9   11.06  8.90   0 130
## cbcl_scr_syn_external_r      2 9273 10.96 13.51      6    8.32  7.41   0 112
## cbcl_scr_syn_anxdep_r        3 9273  6.51  7.20      4    5.25  4.45   0  64
## cbcl_scr_syn_withdep_r       4 9273  2.91  4.12      1    2.03  1.48   0  33
## cbcl_scr_syn_attention_r     5 9273  7.57  8.45      5    6.09  7.41   0  53
## cbcl_scr_syn_rulebreak_r     6 9273  2.95  4.24      1    2.06  1.48   0  39
## cbcl_scr_syn_aggressive_r    7 9273  8.01  9.94      5    6.06  5.93   0  76
## cbcl_scr_syn_totprob_r       8 9273 46.03 43.09     33   38.92 31.13   0 370
##                           range skew kurtosis   se
## cbcl_scr_syn_internal_r     130 1.91     5.13 0.14
## cbcl_scr_syn_external_r     112 2.26     6.75 0.14
## cbcl_scr_syn_anxdep_r        64 1.93     5.06 0.07
## cbcl_scr_syn_withdep_r       33 2.31     6.65 0.04
## cbcl_scr_syn_attention_r     53 1.54     2.40 0.09
## cbcl_scr_syn_rulebreak_r     39 2.62     9.72 0.04
## cbcl_scr_syn_aggressive_r    76 2.15     5.90 0.10
## cbcl_scr_syn_totprob_r      370 1.81     4.31 0.45
#percentage of subjects who have zero across all timepoints
all_zeros <- as.data.frame(colSums(subject_summed == 0))
all_zeros$percentage_allzero <- all_zeros$`colSums(subject_summed == 0)`/nrow(subject_summed)
all_zeros$percentage_allzero <- round(all_zeros$percentage_allzero,4)*100
percentage_allzero <- all_zeros$percentage_allzero


#distribution
ggplot(subject_summed,aes(cbcl_scr_syn_internal_r)) + geom_histogram(binwidth = 1)

ggplot(subject_summed,aes(cbcl_scr_syn_external_r)) + geom_histogram(binwidth = 1)

ggplot(subject_summed,aes(cbcl_scr_syn_anxdep_r)) + geom_histogram(binwidth = 1)

ggplot(subject_summed,aes(cbcl_scr_syn_withdep_r)) + geom_histogram(binwidth = 1)

ggplot(subject_summed,aes(cbcl_scr_syn_attention_r)) + geom_histogram(binwidth = 1)

ggplot(subject_summed,aes(cbcl_scr_syn_rulebreak_r)) + geom_histogram(binwidth = 1)

ggplot(subject_summed,aes(cbcl_scr_syn_aggressive_r)) + geom_histogram(binwidth = 1)

ggplot(subject_summed,aes(cbcl_scr_syn_totprob_r)) + geom_histogram(binwidth = 1)

#Percentage of average people who have 0 at baseline and have 0 the whole time

zero_comparison <- rbind(zero_percentage, percentage_allzero)
## Warning in `[<-.factor`(`*tmp*`, ri, value = 0): invalid factor level, NA
## generated
zero_comparison$eventname <- as.character(zero_comparison$eventname)
zero_comparison[4,1] <- "All"
zero_comparison <- t(zero_comparison)
colnames(zero_comparison) <- c("Baseline","1-year","2-year","All")
zero_comparison <- as.data.frame(zero_comparison[2:9,])
zero_comparison <- lapply(zero_comparison,as.numeric)
zero_comparison$zeros_of_bl <- zero_comparison$All/zero_comparison$Baseline
zero_comparison$CBCL_Outcome <- c("cbcl_scr_syn_internal_r","cbcl_scr_syn_external_r","cbcl_scr_syn_anxdep_r","cbcl_scr_syn_withdep_r","cbcl_scr_syn_attention_r","cbcl_scr_syn_rulebreak_r","cbcl_scr_syn_aggressive_r","cbcl_scr_syn_totprob_r")

ICC’s for total sample and subjects with CBCL = 0

# Necessary libraries
# For lmer()
suppressPackageStartupMessages(library(lme4))
# For ICC
suppressPackageStartupMessages(library(performance))
# For repeatability estimate (rICC)
suppressPackageStartupMessages(library(rptR))

#internal
## all datapts
internal_icc_model <-lmer(cbcl_scr_syn_internal_r ~ 1 + (1 | subjectid),
              data = df_cc,
              REML = TRUE)
summary(internal_icc_model)
## Linear mixed model fit by REML ['lmerMod']
## Formula: cbcl_scr_syn_internal_r ~ 1 + (1 | subjectid)
##    Data: df_cc
## 
## REML criterion at convergence: 139913.8
## 
## Scaled residuals: 
##     Min      1Q  Median      3Q     Max 
## -4.7331 -0.4231 -0.1370  0.3163  7.5438 
## 
## Random effects:
##  Groups    Name        Variance Std.Dev.
##  subjectid (Intercept) 21.30    4.615   
##  Residual              10.09    3.177   
## Number of obs: 23857, groups:  subjectid, 9273
## 
## Fixed effects:
##             Estimate Std. Error t value
## (Intercept)  5.19071    0.05239   99.08
icc(internal_icc_model)
## # Intraclass Correlation Coefficient
## 
##     Adjusted ICC: 0.679
##   Unadjusted ICC: 0.679
## zeros
internal_zero_ids_df <- df_cc %>% dplyr::select(c("subjectid","cbcl_scr_syn_internal_r")) %>% filter(cbcl_scr_syn_internal_r==0)
internal_zero_ids <- unique(internal_zero_ids_df$subjectid)
internal_zero <- df_cc %>% dplyr::select(c("subjectid","cbcl_scr_syn_internal_r")) %>% filter(subjectid %in% internal_zero_ids)
internal_icc_model_zeros <-lmer(cbcl_scr_syn_internal_r ~ 1 + (1 | subjectid),
              data = internal_zero,
              REML = TRUE)
summary(internal_icc_model_zeros)
## Linear mixed model fit by REML ['lmerMod']
## Formula: cbcl_scr_syn_internal_r ~ 1 + (1 | subjectid)
##    Data: internal_zero
## 
## REML criterion at convergence: 30686.1
## 
## Scaled residuals: 
##     Min      1Q  Median      3Q     Max 
## -1.4925 -0.5231 -0.4661  0.3179 11.9425 
## 
## Random effects:
##  Groups    Name        Variance Std.Dev.
##  subjectid (Intercept) 0.3517   0.593   
##  Residual              4.6593   2.159   
## Number of obs: 6902, groups:  subjectid, 2613
## 
## Fixed effects:
##             Estimate Std. Error t value
## (Intercept)  1.23390    0.02854   43.23
icc(internal_icc_model_zeros)
## # Intraclass Correlation Coefficient
## 
##     Adjusted ICC: 0.070
##   Unadjusted ICC: 0.070
#external
## all datapts
external_icc_model <-lmer(cbcl_scr_syn_external_r ~ 1 + (1 | subjectid),
              data = df_cc,
              REML = TRUE)
summary(external_icc_model)
## Linear mixed model fit by REML ['lmerMod']
## Formula: cbcl_scr_syn_external_r ~ 1 + (1 | subjectid)
##    Data: df_cc
## 
## REML criterion at convergence: 138094.5
## 
## Scaled residuals: 
##     Min      1Q  Median      3Q     Max 
## -5.5881 -0.3616 -0.1576  0.2581  8.4310 
## 
## Random effects:
##  Groups    Name        Variance Std.Dev.
##  subjectid (Intercept) 23.784   4.877   
##  Residual               8.522   2.919   
## Number of obs: 23857, groups:  subjectid, 9273
## 
## Fixed effects:
##             Estimate Std. Error t value
## (Intercept)  4.31205    0.05426   79.47
icc(external_icc_model)
## # Intraclass Correlation Coefficient
## 
##     Adjusted ICC: 0.736
##   Unadjusted ICC: 0.736
## zeros
external_zero_ids_df <- df_cc %>% dplyr::select(c("subjectid","cbcl_scr_syn_external_r")) %>% filter(cbcl_scr_syn_external_r==0)
external_zero_ids <- unique(external_zero_ids_df$subjectid)
external_zero <- df_cc %>% dplyr::select(c("subjectid","cbcl_scr_syn_external_r")) %>% filter(subjectid %in% external_zero_ids)
external_icc_model_zeros <-lmer(cbcl_scr_syn_external_r ~ 1 + (1 | subjectid),
              data = external_zero,
              REML = TRUE)
summary(external_icc_model_zeros)
## Linear mixed model fit by REML ['lmerMod']
## Formula: cbcl_scr_syn_external_r ~ 1 + (1 | subjectid)
##    Data: external_zero
## 
## REML criterion at convergence: 41969.3
## 
## Scaled residuals: 
##     Min      1Q  Median      3Q     Max 
## -2.3217 -0.4121 -0.3631  0.1030 16.5952 
## 
## Random effects:
##  Groups    Name        Variance Std.Dev.
##  subjectid (Intercept) 0.3897   0.6242  
##  Residual              3.2472   1.8020  
## Number of obs: 10187, groups:  subjectid, 3841
## 
## Fixed effects:
##             Estimate Std. Error t value
## (Intercept)  0.88988    0.02058   43.24
icc(external_icc_model_zeros)
## # Intraclass Correlation Coefficient
## 
##     Adjusted ICC: 0.107
##   Unadjusted ICC: 0.107
#anxdep
## all datapts
anxdep_icc_model <-lmer(cbcl_scr_syn_anxdep_r ~ 1 + (1 | subjectid),
              data = df_cc,
              REML = TRUE)
summary(anxdep_icc_model)
## Linear mixed model fit by REML ['lmerMod']
## Formula: cbcl_scr_syn_anxdep_r ~ 1 + (1 | subjectid)
##    Data: df_cc
## 
## REML criterion at convergence: 111637.3
## 
## Scaled residuals: 
##     Min      1Q  Median      3Q     Max 
## -4.9202 -0.4008 -0.2045  0.3574  6.8767 
## 
## Random effects:
##  Groups    Name        Variance Std.Dev.
##  subjectid (Intercept) 6.162    2.482   
##  Residual              3.166    1.779   
## Number of obs: 23857, groups:  subjectid, 9273
## 
## Fixed effects:
##             Estimate Std. Error t value
## (Intercept)  2.54257    0.02837   89.62
icc(anxdep_icc_model)
## # Intraclass Correlation Coefficient
## 
##     Adjusted ICC: 0.661
##   Unadjusted ICC: 0.661
## zeros
anxdep_zero_ids_df <- df_cc %>% dplyr::select(c("subjectid","cbcl_scr_syn_anxdep_r")) %>% filter(cbcl_scr_syn_anxdep_r==0)
anxdep_zero_ids <- unique(anxdep_zero_ids_df$subjectid)
anxdep_zero <- df_cc %>% dplyr::select(c("subjectid","cbcl_scr_syn_anxdep_r")) %>% filter(subjectid %in% anxdep_zero_ids)
anxdep_icc_model_zeros <-lmer(cbcl_scr_syn_anxdep_r ~ 1 + (1 | subjectid),
              data = anxdep_zero,
              REML = TRUE)
summary(anxdep_icc_model_zeros)
## Linear mixed model fit by REML ['lmerMod']
## Formula: cbcl_scr_syn_anxdep_r ~ 1 + (1 | subjectid)
##    Data: anxdep_zero
## 
## REML criterion at convergence: 39764.5
## 
## Scaled residuals: 
##     Min      1Q  Median      3Q     Max 
## -1.9205 -0.4821 -0.4268  0.2226 11.4628 
## 
## Random effects:
##  Groups    Name        Variance Std.Dev.
##  subjectid (Intercept) 0.1733   0.4163  
##  Residual              1.8089   1.3450  
## Number of obs: 11309, groups:  subjectid, 4295
## 
## Fixed effects:
##             Estimate Std. Error t value
## (Intercept)   0.7390     0.0142   52.03
icc(anxdep_icc_model_zeros)
## # Intraclass Correlation Coefficient
## 
##     Adjusted ICC: 0.087
##   Unadjusted ICC: 0.087
#withdep
## all datapts
withdep_icc_model <-lmer(cbcl_scr_syn_withdep_r ~ 1 + (1 | subjectid),
              data = df_cc,
              REML = TRUE)
summary(withdep_icc_model)
## Linear mixed model fit by REML ['lmerMod']
## Formula: cbcl_scr_syn_withdep_r ~ 1 + (1 | subjectid)
##    Data: df_cc
## 
## REML criterion at convergence: 88075.7
## 
## Scaled residuals: 
##     Min      1Q  Median      3Q     Max 
## -5.6284 -0.3950 -0.1783  0.2933  9.4069 
## 
## Random effects:
##  Groups    Name        Variance Std.Dev.
##  subjectid (Intercept) 1.964    1.402   
##  Residual              1.267    1.125   
## Number of obs: 23857, groups:  subjectid, 9273
## 
## Fixed effects:
##             Estimate Std. Error t value
## (Intercept)  1.13401    0.01636    69.3
icc(withdep_icc_model)
## # Intraclass Correlation Coefficient
## 
##     Adjusted ICC: 0.608
##   Unadjusted ICC: 0.608
## zeros
withdep_zero_ids_df <- df_cc %>% dplyr::select(c("subjectid","cbcl_scr_syn_withdep_r")) %>% filter(cbcl_scr_syn_withdep_r==0)
withdep_zero_ids <- unique(withdep_zero_ids_df$subjectid)
withdep_zero <- df_cc %>% dplyr::select(c("subjectid","cbcl_scr_syn_withdep_r")) %>% filter(subjectid %in% withdep_zero_ids)
withdep_icc_model_zeros <-lmer(cbcl_scr_syn_withdep_r ~ 1 + (1 | subjectid),
              data = withdep_zero,
              REML = TRUE)
summary(withdep_icc_model_zeros)
## Linear mixed model fit by REML ['lmerMod']
## Formula: cbcl_scr_syn_withdep_r ~ 1 + (1 | subjectid)
##    Data: withdep_zero
## 
## REML criterion at convergence: 46227.5
## 
## Scaled residuals: 
##     Min      1Q  Median      3Q     Max 
## -2.0703 -0.4434 -0.3531  0.2156 15.7045 
## 
## Random effects:
##  Groups    Name        Variance Std.Dev.
##  subjectid (Intercept) 0.0872   0.2953  
##  Residual              0.8103   0.9001  
## Number of obs: 16981, groups:  subjectid, 6501
## 
## Fixed effects:
##             Estimate Std. Error t value
## (Intercept)  0.42042    0.00785   53.56
icc(withdep_icc_model_zeros)
## # Intraclass Correlation Coefficient
## 
##     Adjusted ICC: 0.097
##   Unadjusted ICC: 0.097
#attention
## all datapts
attention_icc_model <-lmer(cbcl_scr_syn_attention_r ~ 1 + (1 | subjectid),
              data = df_cc,
              REML = TRUE)
summary(attention_icc_model)
## Linear mixed model fit by REML ['lmerMod']
## Formula: cbcl_scr_syn_attention_r ~ 1 + (1 | subjectid)
##    Data: df_cc
## 
## REML criterion at convergence: 113290.1
## 
## Scaled residuals: 
##     Min      1Q  Median      3Q     Max 
## -4.0804 -0.3522 -0.1670  0.3374  4.7753 
## 
## Random effects:
##  Groups    Name        Variance Std.Dev.
##  subjectid (Intercept) 9.090    3.015   
##  Residual              2.894    1.701   
## Number of obs: 23857, groups:  subjectid, 9273
## 
## Fixed effects:
##             Estimate Std. Error t value
## (Intercept)  2.96173    0.03331   88.92
icc(attention_icc_model)
## # Intraclass Correlation Coefficient
## 
##     Adjusted ICC: 0.759
##   Unadjusted ICC: 0.759
## zeros
attention_zero_ids_df <- df_cc %>% dplyr::select(c("subjectid","cbcl_scr_syn_attention_r")) %>% filter(cbcl_scr_syn_attention_r==0)
attention_zero_ids <- unique(attention_zero_ids_df$subjectid)
attention_zero <- df_cc %>% dplyr::select(c("subjectid","cbcl_scr_syn_attention_r")) %>% filter(subjectid %in% attention_zero_ids)
attention_icc_model_zeros <-lmer(cbcl_scr_syn_attention_r ~ 1 + (1 | subjectid),
              data = attention_zero,
              REML = TRUE)
summary(attention_icc_model_zeros)
## Linear mixed model fit by REML ['lmerMod']
## Formula: cbcl_scr_syn_attention_r ~ 1 + (1 | subjectid)
##    Data: attention_zero
## 
## REML criterion at convergence: 38271.8
## 
## Scaled residuals: 
##     Min      1Q  Median      3Q     Max 
## -1.6213 -0.4568 -0.3921  0.2748 10.4620 
## 
## Random effects:
##  Groups    Name        Variance Std.Dev.
##  subjectid (Intercept) 0.1694   0.4116  
##  Residual              1.5769   1.2558  
## Number of obs: 11296, groups:  subjectid, 4307
## 
## Fixed effects:
##             Estimate Std. Error t value
## (Intercept)  0.65109    0.01343   48.48
icc(attention_icc_model_zeros)
## # Intraclass Correlation Coefficient
## 
##     Adjusted ICC: 0.097
##   Unadjusted ICC: 0.097
#rulebreak
## all datapts
rulebreak_icc_model <-lmer(cbcl_scr_syn_rulebreak_r ~ 1 + (1 | subjectid),
              data = df_cc,
              REML = TRUE)
summary(rulebreak_icc_model)
## Linear mixed model fit by REML ['lmerMod']
## Formula: cbcl_scr_syn_rulebreak_r ~ 1 + (1 | subjectid)
##    Data: df_cc
## 
## REML criterion at convergence: 87731.4
## 
## Scaled residuals: 
##     Min      1Q  Median      3Q     Max 
## -5.2835 -0.3999 -0.1589  0.2497 11.4049 
## 
## Random effects:
##  Groups    Name        Variance Std.Dev.
##  subjectid (Intercept) 2.253    1.501   
##  Residual              1.165    1.079   
## Number of obs: 23857, groups:  subjectid, 9273
## 
## Fixed effects:
##             Estimate Std. Error t value
## (Intercept)  1.16649    0.01716   67.96
icc(rulebreak_icc_model)
## # Intraclass Correlation Coefficient
## 
##     Adjusted ICC: 0.659
##   Unadjusted ICC: 0.659
## zeros
rulebreak_zero_ids_df <- df_cc %>% dplyr::select(c("subjectid","cbcl_scr_syn_rulebreak_r")) %>% filter(cbcl_scr_syn_rulebreak_r==0)
rulebreak_zero_ids <- unique(rulebreak_zero_ids_df$subjectid)
rulebreak_zero <- df_cc %>% dplyr::select(c("subjectid","cbcl_scr_syn_rulebreak_r")) %>% filter(subjectid %in% rulebreak_zero_ids)
rulebreak_icc_model_zeros <-lmer(cbcl_scr_syn_rulebreak_r ~ 1 + (1 | subjectid),
              data = rulebreak_zero,
              REML = TRUE)
summary(rulebreak_icc_model_zeros)
## Linear mixed model fit by REML ['lmerMod']
## Formula: cbcl_scr_syn_rulebreak_r ~ 1 + (1 | subjectid)
##    Data: rulebreak_zero
## 
## REML criterion at convergence: 44269.7
## 
## Scaled residuals: 
##     Min      1Q  Median      3Q     Max 
## -2.1406 -0.4507 -0.3568 -0.3568 16.6498 
## 
## Random effects:
##  Groups    Name        Variance Std.Dev.
##  subjectid (Intercept) 0.07883  0.2808  
##  Residual              0.73976  0.8601  
## Number of obs: 16830, groups:  subjectid, 6403
## 
## Fixed effects:
##             Estimate Std. Error t value
## (Intercept)  0.40502    0.00753   53.79
icc(rulebreak_icc_model_zeros)
## # Intraclass Correlation Coefficient
## 
##     Adjusted ICC: 0.096
##   Unadjusted ICC: 0.096
#aggressive
## all datapts
aggressive_icc_model <-lmer(cbcl_scr_syn_aggressive_r ~ 1 + (1 | subjectid),
              data = df_cc,
              REML = TRUE)
summary(aggressive_icc_model)
## Linear mixed model fit by REML ['lmerMod']
## Formula: cbcl_scr_syn_aggressive_r ~ 1 + (1 | subjectid)
##    Data: df_cc
## 
## REML criterion at convergence: 124034.9
## 
## Scaled residuals: 
##     Min      1Q  Median      3Q     Max 
## -5.7104 -0.3608 -0.1606  0.2624  8.5561 
## 
## Random effects:
##  Groups    Name        Variance Std.Dev.
##  subjectid (Intercept) 12.715   3.566   
##  Residual               4.816   2.195   
## Number of obs: 23857, groups:  subjectid, 9273
## 
## Fixed effects:
##             Estimate Std. Error t value
## (Intercept)  3.14385    0.03982   78.95
icc(aggressive_icc_model)
## # Intraclass Correlation Coefficient
## 
##     Adjusted ICC: 0.725
##   Unadjusted ICC: 0.725
## zeros
aggressive_zero_ids_df <- df_cc %>% dplyr::select(c("subjectid","cbcl_scr_syn_aggressive_r")) %>% filter(cbcl_scr_syn_aggressive_r==0)
aggressive_zero_ids <- unique(aggressive_zero_ids_df$subjectid)
aggressive_zero <- df_cc %>% dplyr::select(c("subjectid","cbcl_scr_syn_aggressive_r")) %>% filter(subjectid %in% aggressive_zero_ids)
aggressive_icc_model_zeros <-lmer(cbcl_scr_syn_aggressive_r ~ 1 + (1 | subjectid),
              data = aggressive_zero,
              REML = TRUE)
summary(aggressive_icc_model_zeros)
## Linear mixed model fit by REML ['lmerMod']
## Formula: cbcl_scr_syn_aggressive_r ~ 1 + (1 | subjectid)
##    Data: aggressive_zero
## 
## REML criterion at convergence: 43014.3
## 
## Scaled residuals: 
##     Min      1Q  Median      3Q     Max 
## -2.1781 -0.4252 -0.3648  0.2057 12.7275 
## 
## Random effects:
##  Groups    Name        Variance Std.Dev.
##  subjectid (Intercept) 0.248    0.498   
##  Residual              2.092    1.446   
## Number of obs: 11691, groups:  subjectid, 4434
## 
## Fixed effects:
##             Estimate Std. Error t value
## (Intercept)  0.71521    0.01539   46.48
icc(aggressive_icc_model_zeros)
## # Intraclass Correlation Coefficient
## 
##     Adjusted ICC: 0.106
##   Unadjusted ICC: 0.106

ICC’s for subjects with CBCL > 0

# Necessary libraries
# For lmer()
suppressPackageStartupMessages(library(lme4))
# For ICC
suppressPackageStartupMessages(library(performance))
# For repeatability estimate (rICC)
suppressPackageStartupMessages(library(rptR))

#internal
## nonzeros
internal_nonzero_ids_df <- df_cc %>% dplyr::select(c("subjectid","cbcl_scr_syn_internal_r")) %>% filter(cbcl_scr_syn_internal_r>0)
internal_nonzero_ids <- unique(internal_nonzero_ids_df$subjectid)
internal_nonzero <- df_cc %>% dplyr::select(c("subjectid","cbcl_scr_syn_internal_r")) %>% filter(subjectid %in% internal_nonzero_ids)
internal_icc_model_nonzeros <-lmer(cbcl_scr_syn_internal_r ~ 1 + (1 | subjectid),
              data = internal_nonzero,
              REML = TRUE)
summary(internal_icc_model_nonzeros)
## Linear mixed model fit by REML ['lmerMod']
## Formula: cbcl_scr_syn_internal_r ~ 1 + (1 | subjectid)
##    Data: internal_nonzero
## 
## REML criterion at convergence: 132669.3
## 
## Scaled residuals: 
##     Min      1Q  Median      3Q     Max 
## -4.5655 -0.4615 -0.1141  0.3359  7.4038 
## 
## Random effects:
##  Groups    Name        Variance Std.Dev.
##  subjectid (Intercept) 20.90    4.571   
##  Residual              10.65    3.264   
## Number of obs: 22511, groups:  subjectid, 8697
## 
## Fixed effects:
##             Estimate Std. Error t value
## (Intercept)  5.52520    0.05387   102.6
icc(internal_icc_model_nonzeros)
## # Intraclass Correlation Coefficient
## 
##     Adjusted ICC: 0.662
##   Unadjusted ICC: 0.662
#external
## nonzeros
external_nonzero_ids_df <- df_cc %>% dplyr::select(c("subjectid","cbcl_scr_syn_external_r")) %>% filter(cbcl_scr_syn_external_r>0)
external_nonzero_ids <- unique(external_nonzero_ids_df$subjectid)
external_nonzero <- df_cc %>% dplyr::select(c("subjectid","cbcl_scr_syn_external_r")) %>% filter(subjectid %in% external_nonzero_ids)
external_icc_model_nonzeros <-lmer(cbcl_scr_syn_external_r ~ 1 + (1 | subjectid),
              data = external_nonzero,
              REML = TRUE)
summary(external_icc_model_nonzeros)
## Linear mixed model fit by REML ['lmerMod']
## Formula: cbcl_scr_syn_external_r ~ 1 + (1 | subjectid)
##    Data: external_nonzero
## 
## REML criterion at convergence: 120925
## 
## Scaled residuals: 
##     Min      1Q  Median      3Q     Max 
## -5.1355 -0.4037 -0.1097  0.2963  7.8976 
## 
## Random effects:
##  Groups    Name        Variance Std.Dev.
##  subjectid (Intercept) 24.191   4.918   
##  Residual               9.862   3.140   
## Number of obs: 20535, groups:  subjectid, 7928
## 
## Fixed effects:
##             Estimate Std. Error t value
## (Intercept)  5.03510    0.05966    84.4
icc(external_icc_model_nonzeros)
## # Intraclass Correlation Coefficient
## 
##     Adjusted ICC: 0.710
##   Unadjusted ICC: 0.710
#anxdep
## nonzeros
anxdep_nonzero_ids_df <- df_cc %>% dplyr::select(c("subjectid","cbcl_scr_syn_anxdep_r")) %>% filter(cbcl_scr_syn_anxdep_r>0)
anxdep_nonzero_ids <- unique(anxdep_nonzero_ids_df$subjectid)
anxdep_nonzero <- df_cc %>% dplyr::select(c("subjectid","cbcl_scr_syn_anxdep_r")) %>% filter(subjectid %in% anxdep_nonzero_ids)
anxdep_icc_model_nonzeros <-lmer(cbcl_scr_syn_anxdep_r ~ 1 + (1 | subjectid),
              data = anxdep_nonzero,
              REML = TRUE)
summary(anxdep_icc_model_nonzeros)
## Linear mixed model fit by REML ['lmerMod']
## Formula: cbcl_scr_syn_anxdep_r ~ 1 + (1 | subjectid)
##    Data: anxdep_nonzero
## 
## REML criterion at convergence: 96834.7
## 
## Scaled residuals: 
##     Min      1Q  Median      3Q     Max 
## -4.4485 -0.5208 -0.0904  0.3756  6.4347 
## 
## Random effects:
##  Groups    Name        Variance Std.Dev.
##  subjectid (Intercept) 5.928    2.435   
##  Residual              3.699    1.923   
## Number of obs: 20296, groups:  subjectid, 7809
## 
## Fixed effects:
##             Estimate Std. Error t value
## (Intercept)  3.00963    0.03083   97.63
icc(anxdep_icc_model_nonzeros)
## # Intraclass Correlation Coefficient
## 
##     Adjusted ICC: 0.616
##   Unadjusted ICC: 0.616
#withdep
## nonzeros
withdep_nonzero_ids_df <- df_cc %>% dplyr::select(c("subjectid","cbcl_scr_syn_withdep_r")) %>% filter(cbcl_scr_syn_withdep_r>0)
withdep_nonzero_ids <- unique(withdep_nonzero_ids_df$subjectid)
withdep_nonzero <- df_cc %>% dplyr::select(c("subjectid","cbcl_scr_syn_withdep_r")) %>% filter(subjectid %in% withdep_nonzero_ids)
withdep_icc_model_nonzeros <-lmer(cbcl_scr_syn_withdep_r ~ 1 + (1 | subjectid),
              data = withdep_nonzero,
              REML = TRUE)
summary(withdep_icc_model_nonzeros)
## Linear mixed model fit by REML ['lmerMod']
## Formula: cbcl_scr_syn_withdep_r ~ 1 + (1 | subjectid)
##    Data: withdep_nonzero
## 
## REML criterion at convergence: 61960.5
## 
## Scaled residuals: 
##     Min      1Q  Median      3Q     Max 
## -4.3003 -0.4962 -0.1340  0.3966  7.7721 
## 
## Random effects:
##  Groups    Name        Variance Std.Dev.
##  subjectid (Intercept) 1.974    1.405   
##  Residual              1.927    1.388   
## Number of obs: 15539, groups:  subjectid, 5945
## 
## Fixed effects:
##             Estimate Std. Error t value
## (Intercept)  1.75744    0.02148   81.83
icc(withdep_icc_model_nonzeros)
## # Intraclass Correlation Coefficient
## 
##     Adjusted ICC: 0.506
##   Unadjusted ICC: 0.506
#attention
## nonzeros
attention_nonzero_ids_df <- df_cc %>% dplyr::select(c("subjectid","cbcl_scr_syn_attention_r")) %>% filter(cbcl_scr_syn_attention_r>0)
attention_nonzero_ids <- unique(attention_nonzero_ids_df$subjectid)
attention_nonzero <- df_cc %>% dplyr::select(c("subjectid","cbcl_scr_syn_attention_r")) %>% filter(subjectid %in% attention_nonzero_ids)
attention_icc_model_nonzeros <-lmer(cbcl_scr_syn_attention_r ~ 1 + (1 | subjectid),
              data = attention_nonzero,
              REML = TRUE)
summary(attention_icc_model_nonzeros)
## Linear mixed model fit by REML ['lmerMod']
## Formula: cbcl_scr_syn_attention_r ~ 1 + (1 | subjectid)
##    Data: attention_nonzero
## 
## REML criterion at convergence: 94139
## 
## Scaled residuals: 
##     Min      1Q  Median      3Q     Max 
## -3.6399 -0.5107 -0.0526  0.4245  4.3271 
## 
## Random effects:
##  Groups    Name        Variance Std.Dev.
##  subjectid (Intercept) 8.696    2.949   
##  Residual              3.545    1.883   
## Number of obs: 19352, groups:  subjectid, 7448
## 
## Fixed effects:
##             Estimate Std. Error t value
## (Intercept)  3.67853    0.03689   99.72
icc(attention_icc_model_nonzeros)
## # Intraclass Correlation Coefficient
## 
##     Adjusted ICC: 0.710
##   Unadjusted ICC: 0.710
#rulebreak
## nonzeros
rulebreak_nonzero_ids_df <- df_cc %>% dplyr::select(c("subjectid","cbcl_scr_syn_rulebreak_r")) %>% filter(cbcl_scr_syn_rulebreak_r>0)
rulebreak_nonzero_ids <- unique(rulebreak_nonzero_ids_df$subjectid)
rulebreak_nonzero <- df_cc %>% dplyr::select(c("subjectid","cbcl_scr_syn_rulebreak_r")) %>% filter(subjectid %in% rulebreak_nonzero_ids)
rulebreak_icc_model_nonzeros <-lmer(cbcl_scr_syn_rulebreak_r ~ 1 + (1 | subjectid),
              data = rulebreak_nonzero,
              REML = TRUE)
summary(rulebreak_icc_model_nonzeros)
## Linear mixed model fit by REML ['lmerMod']
## Formula: cbcl_scr_syn_rulebreak_r ~ 1 + (1 | subjectid)
##    Data: rulebreak_nonzero
## 
## REML criterion at convergence: 62044.9
## 
## Scaled residuals: 
##     Min      1Q  Median      3Q     Max 
## -4.0710 -0.4704 -0.1196  0.3152  9.5994 
## 
## Random effects:
##  Groups    Name        Variance Std.Dev.
##  subjectid (Intercept) 2.352    1.534   
##  Residual              1.773    1.332   
## Number of obs: 15592, groups:  subjectid, 6005
## 
## Fixed effects:
##             Estimate Std. Error t value
## (Intercept)   1.7930     0.0226   79.33
icc(rulebreak_icc_model_nonzeros)
## # Intraclass Correlation Coefficient
## 
##     Adjusted ICC: 0.570
##   Unadjusted ICC: 0.570
#aggressive
## nonzeros
aggressive_nonzero_ids_df <- df_cc %>% dplyr::select(c("subjectid","cbcl_scr_syn_aggressive_r")) %>% filter(cbcl_scr_syn_aggressive_r>0)
aggressive_nonzero_ids <- unique(aggressive_nonzero_ids_df$subjectid)
aggressive_nonzero <- df_cc %>% dplyr::select(c("subjectid","cbcl_scr_syn_aggressive_r")) %>% filter(subjectid %in% aggressive_nonzero_ids)
aggressive_icc_model_nonzeros <-lmer(cbcl_scr_syn_aggressive_r ~ 1 + (1 | subjectid),
              data = aggressive_nonzero,
              REML = TRUE)
summary(aggressive_icc_model_nonzeros)
## Linear mixed model fit by REML ['lmerMod']
## Formula: cbcl_scr_syn_aggressive_r ~ 1 + (1 | subjectid)
##    Data: aggressive_nonzero
## 
## REML criterion at convergence: 103482
## 
## Scaled residuals: 
##     Min      1Q  Median      3Q     Max 
## -5.0814 -0.4505 -0.1032  0.3293  7.8256 
## 
## Random effects:
##  Groups    Name        Variance Std.Dev.
##  subjectid (Intercept) 12.875   3.588   
##  Residual               5.895   2.428   
## Number of obs: 19395, groups:  subjectid, 7473
## 
## Fixed effects:
##             Estimate Std. Error t value
## (Intercept)   3.8917     0.0452   86.09
icc(aggressive_icc_model_nonzeros)
## # Intraclass Correlation Coefficient
## 
##     Adjusted ICC: 0.686
##   Unadjusted ICC: 0.686

Air Pollution Descriptives

#pm2.5
mean(df_cc$reshist_addr1_pm252016aa_bl[df_cc$eventname=="Baseline"])
## [1] 7.706378
sd(df_cc$reshist_addr1_pm252016aa_bl[df_cc$eventname=="Baseline"])
## [1] 1.570865
range(df_cc$reshist_addr1_pm252016aa_bl[df_cc$eventname=="Baseline"])
## [1]  1.722393 15.901970
hist(df_cc$reshist_addr1_pm252016aa_bl[df_cc$eventname=="Baseline"])

##test difference with EPA level
t.test(df_cc$reshist_addr1_pm252016aa_bl[df_cc$eventname=="Baseline"], mu = 12, alternative = "two.sided")
## 
##  One Sample t-test
## 
## data:  df_cc$reshist_addr1_pm252016aa_bl[df_cc$eventname == "Baseline"]
## t = -263.18, df = 9270, p-value < 2.2e-16
## alternative hypothesis: true mean is not equal to 12
## 95 percent confidence interval:
##  7.674398 7.738358
## sample estimates:
## mean of x 
##  7.706378
#no2
mean(df_cc$reshist_addr1_no2_2016_aavg_bl[df_cc$eventname=="Baseline"])
## [1] 18.59514
sd(df_cc$reshist_addr1_no2_2016_aavg_bl[df_cc$eventname=="Baseline"])
## [1] 5.57146
range(df_cc$reshist_addr1_no2_2016_aavg_bl[df_cc$eventname=="Baseline"])
## [1]  0.7291464 37.9399900
hist(df_cc$reshist_addr1_no2_2016_aavg_bl[df_cc$eventname=="Baseline"])

##test difference with EPA level
t.test(df_cc$reshist_addr1_no2_2016_aavg_bl[df_cc$eventname=="Baseline"], mu = 53, alternative = "two.sided")
## 
##  One Sample t-test
## 
## data:  df_cc$reshist_addr1_no2_2016_aavg_bl[df_cc$eventname == "Baseline"]
## t = -594.59, df = 9270, p-value < 2.2e-16
## alternative hypothesis: true mean is not equal to 53
## 95 percent confidence interval:
##  18.48171 18.70857
## sample estimates:
## mean of x 
##  18.59514

Histogram of CBCL Outcomes by Wave

internal_histogram <- ggplot(df_cc, aes(x=cbcl_scr_syn_internal_r)) + geom_histogram() + facet_grid(~eventname) + labs(x="Internalizing Behavior") +
  theme(axis.title.x = element_text(face="bold"))
external_histogram <- ggplot(df_cc, aes(x=cbcl_scr_syn_external_r)) + geom_histogram() + facet_grid(~eventname) + labs(x="Externalizing Behavior") +
  theme(axis.title.x = element_text(face="bold"))
anxdep_histogram <- ggplot(df_cc, aes(x=cbcl_scr_syn_anxdep_r)) + geom_histogram() + facet_grid(~eventname) + labs(x="Anxious/Depressed") +
  theme(axis.title.x = element_text(face="bold"))
withdep_histogram <- ggplot(df_cc, aes(x=cbcl_scr_syn_withdep_r)) + geom_histogram() + facet_grid(~eventname) + labs(x="Withdrawn/Depressed") +
  theme(axis.title.x = element_text(face="bold"))
attention_histogram <- ggplot(df_cc, aes(x=cbcl_scr_syn_attention_r)) + geom_histogram() + facet_grid(~eventname) + labs(x="Attention Problems") +
  theme(axis.title.x = element_text(face="bold"))
rulebreak_histogram <- ggplot(df_cc, aes(x=cbcl_scr_syn_rulebreak_r)) + geom_histogram() + facet_grid(~eventname) + labs(x="Rule-breaking Behavior") +
  theme(axis.title.x = element_text(face="bold")) 
aggressive_histogram <- ggplot(df_cc, aes(x=cbcl_scr_syn_aggressive_r)) + geom_histogram() + facet_grid(~eventname) + labs(x="Aggressive Behavior") +
  theme(axis.title.x = element_text(face="bold")) 

#export above
##arrange plots
histograms <- grid.arrange(internal_histogram, anxdep_histogram, withdep_histogram, external_histogram, rulebreak_histogram, aggressive_histogram, attention_histogram, ncol=3) 
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.

##save
#ggsave(histograms,device="png",filename="Figures/CBCL_Histograms_2.7.23.png",width=12,height=12, units = "in")

Check linearity of non-zero cbcl scores with air pollution estimates

PM2.5

#internal
internal_linear <- df_cc %>% dplyr::select(c("subjectid","eventname","cbcl_scr_syn_internal_r","reshist_addr1_pm252016aa_bl","reshist_addr1_no2_2016_aavg_bl")) %>% filter(cbcl_scr_syn_internal_r>0)
internal_linear_pm <- ggplot(internal_linear,aes(x=cbcl_scr_syn_internal_r,y=reshist_addr1_pm252016aa_bl)) + geom_point() + geom_smooth() + facet_grid(~eventname) + labs(x="internal",y="pm2.5")
internal_linear_no <- ggplot(internal_linear,aes(x=cbcl_scr_syn_internal_r,y=reshist_addr1_no2_2016_aavg_bl)) + geom_point() + geom_smooth() + facet_grid(~eventname) + labs(x="internal",y="no2")

#anxdep
anxdep_linear <- df_cc %>% dplyr::select(c("subjectid","eventname","cbcl_scr_syn_anxdep_r","reshist_addr1_pm252016aa_bl","reshist_addr1_no2_2016_aavg_bl")) %>% filter(cbcl_scr_syn_anxdep_r>0)
anxdep_linear_pm <- ggplot(anxdep_linear,aes(x=cbcl_scr_syn_anxdep_r,y=reshist_addr1_pm252016aa_bl)) + geom_point() + geom_smooth() + facet_grid(~eventname) + labs(x="anxdep",y="pm2.5")
anxdep_linear_no <- ggplot(anxdep_linear,aes(x=cbcl_scr_syn_anxdep_r,y=reshist_addr1_no2_2016_aavg_bl)) + geom_point() + geom_smooth() + facet_grid(~eventname) + labs(x="anxdep",y="no2")

#withdep
withdep_linear <- df_cc %>% dplyr::select(c("subjectid","eventname","cbcl_scr_syn_withdep_r","reshist_addr1_pm252016aa_bl","reshist_addr1_no2_2016_aavg_bl")) %>% filter(cbcl_scr_syn_withdep_r>0)
withdep_linear_pm <- ggplot(withdep_linear,aes(x=cbcl_scr_syn_withdep_r,y=reshist_addr1_pm252016aa_bl)) + geom_point() + geom_smooth() + facet_grid(~eventname) + labs(x="withdep",y="pm2.5")
withdep_linear_no <- ggplot(withdep_linear,aes(x=cbcl_scr_syn_withdep_r,y=reshist_addr1_no2_2016_aavg_bl)) + geom_point() + geom_smooth() + facet_grid(~eventname) + labs(x="withdep",y="no2")

#external
external_linear <- df_cc %>% dplyr::select(c("subjectid","eventname","cbcl_scr_syn_external_r","reshist_addr1_pm252016aa_bl","reshist_addr1_no2_2016_aavg_bl")) %>% filter(cbcl_scr_syn_external_r>0)
external_linear_pm <- ggplot(external_linear,aes(x=cbcl_scr_syn_external_r,y=reshist_addr1_pm252016aa_bl)) + geom_point() + geom_smooth() + facet_grid(~eventname) + labs(x="external",y="pm2.5")
external_linear_no <- ggplot(external_linear,aes(x=cbcl_scr_syn_external_r,y=reshist_addr1_no2_2016_aavg_bl)) + geom_point() + geom_smooth() + facet_grid(~eventname) + labs(x="external",y="no2")

#rulebreak
rulebreak_linear <- df_cc %>% dplyr::select(c("subjectid","eventname","cbcl_scr_syn_rulebreak_r","reshist_addr1_pm252016aa_bl","reshist_addr1_no2_2016_aavg_bl")) %>% filter(cbcl_scr_syn_rulebreak_r>0)
rulebreak_linear_pm <- ggplot(rulebreak_linear,aes(x=cbcl_scr_syn_rulebreak_r,y=reshist_addr1_pm252016aa_bl)) + geom_point() + geom_smooth() + facet_grid(~eventname) + labs(x="rulebreak",y="pm2.5")
rulebreak_linear_no <- ggplot(rulebreak_linear,aes(x=cbcl_scr_syn_rulebreak_r,y=reshist_addr1_no2_2016_aavg_bl)) + geom_point() + geom_smooth() + facet_grid(~eventname) + labs(x="rulebreak",y="no2")

#aggressive
aggressive_linear <- df_cc %>% dplyr::select(c("subjectid","eventname","cbcl_scr_syn_aggressive_r","reshist_addr1_pm252016aa_bl","reshist_addr1_no2_2016_aavg_bl")) %>% filter(cbcl_scr_syn_aggressive_r>0)
aggressive_linear_pm <- ggplot(aggressive_linear,aes(x=cbcl_scr_syn_aggressive_r,y=reshist_addr1_pm252016aa_bl)) + geom_point() + geom_smooth() + facet_grid(~eventname) + labs(x="aggressive",y="pm2.5")
aggressive_linear_no <- ggplot(aggressive_linear,aes(x=cbcl_scr_syn_aggressive_r,y=reshist_addr1_no2_2016_aavg_bl)) + geom_point() + geom_smooth() + facet_grid(~eventname) + labs(x="aggressive",y="no2")

#attention
attention_linear <- df_cc %>% dplyr::select(c("subjectid","eventname","cbcl_scr_syn_attention_r","reshist_addr1_pm252016aa_bl","reshist_addr1_no2_2016_aavg_bl")) %>% filter(cbcl_scr_syn_attention_r>0)
attention_linear_pm <- ggplot(attention_linear,aes(x=cbcl_scr_syn_attention_r,y=reshist_addr1_pm252016aa_bl)) + geom_point() + geom_smooth() + facet_grid(~eventname) + labs(x="attention",y="pm2.5")
attention_linear_no <- ggplot(attention_linear,aes(x=cbcl_scr_syn_attention_r,y=reshist_addr1_no2_2016_aavg_bl)) + geom_point() + geom_smooth() + facet_grid(~eventname) + labs(x="attention",y="no2")

#all graphs
linearity_pm <- grid.arrange(internal_linear_pm, anxdep_linear_pm, withdep_linear_pm, external_linear_pm, rulebreak_linear_pm, aggressive_linear_pm, attention_linear_pm, ncol=1) 
## `geom_smooth()` using method = 'gam' and formula = 'y ~ s(x, bs = "cs")'
## `geom_smooth()` using method = 'gam' and formula = 'y ~ s(x, bs = "cs")'
## `geom_smooth()` using method = 'gam' and formula = 'y ~ s(x, bs = "cs")'
## `geom_smooth()` using method = 'gam' and formula = 'y ~ s(x, bs = "cs")'
## `geom_smooth()` using method = 'gam' and formula = 'y ~ s(x, bs = "cs")'
## `geom_smooth()` using method = 'gam' and formula = 'y ~ s(x, bs = "cs")'
## `geom_smooth()` using method = 'gam' and formula = 'y ~ s(x, bs = "cs")'

linearity_no <- grid.arrange(internal_linear_no, anxdep_linear_no, withdep_linear_no, external_linear_no, rulebreak_linear_no, aggressive_linear_no, attention_linear_no, ncol=1) 
## `geom_smooth()` using method = 'gam' and formula = 'y ~ s(x, bs = "cs")'
## `geom_smooth()` using method = 'gam' and formula = 'y ~ s(x, bs = "cs")'
## `geom_smooth()` using method = 'gam' and formula = 'y ~ s(x, bs = "cs")'
## `geom_smooth()` using method = 'gam' and formula = 'y ~ s(x, bs = "cs")'
## `geom_smooth()` using method = 'gam' and formula = 'y ~ s(x, bs = "cs")'
## `geom_smooth()` using method = 'gam' and formula = 'y ~ s(x, bs = "cs")'
## `geom_smooth()` using method = 'gam' and formula = 'y ~ s(x, bs = "cs")'

##save
#ggsave(linearity_pm,device="png",filename="Figures/CBCL.PM2p5_Linearity_6.3.23.png",width=12,height=12, units = "in")
#ggsave(linearity_no,device="png",filename="Figures/CBCL.NO2_Linearity_6.3.23.png",width=12,height=12, units = "in")

Internalizing

CBCL + AP Longitudinal Models

Zero-Inflated (ZI) Negative Binomial (NB): glmm.zinb in NBZIMM package.

CBCL only needs one nested random intercept since we eliminated the family nesting by choosing one kid per family.

For the negative binomial portion of the model, we do not nest by subject since the ICC across subjects is very low.

internal_zinb_r <- glmm.zinb(cbcl_scr_syn_internal_r ~ interview_age.c9.y + race_ethnicity.bl + high.educ.bl+ prnt.empl.bl  + neighb_phenx_avg_p.bl.cm + overall.income.bl + sex.bl, random = ~1|abcd_site/subjectid,
              zi_fixed = ~ interview_age.c9.y + race_ethnicity.bl + high.educ.bl+ prnt.empl.bl  + neighb_phenx_avg_p.bl.cm + overall.income.bl + sex.bl, zi_random = ~1|abcd_site, data = df_cc)
## Computational iterations: 7 
## Computational time: 0.906 minutes
summary(internal_zinb_r)
## Linear mixed-effects model fit by maximum likelihood
##   Data: df_cc 
##   AIC BIC logLik
##    NA  NA     NA
## 
## Random effects:
##  Formula: ~1 | abcd_site
##         (Intercept)
## StdDev:   0.1021209
## 
##  Formula: ~1 | subjectid %in% abcd_site
##         (Intercept) Residual
## StdDev:   0.8517082 1.107373
## 
## Variance function:
##  Structure: fixed weights
##  Formula: ~invwt 
## Fixed effects:  cbcl_scr_syn_internal_r ~ interview_age.c9.y + race_ethnicity.bl +      high.educ.bl + prnt.empl.bl + neighb_phenx_avg_p.bl.cm +      overall.income.bl + sex.bl 
##                                              Value  Std.Error    DF   t-value
## (Intercept)                              1.2627398 0.03311301 14511  38.13425
## interview_age.c9.y                       0.0024228 0.00438766 14511   0.55219
## race_ethnicity.blHispanic               -0.0327194 0.03110375  9309  -1.05194
## race_ethnicity.blBlack                  -0.3685892 0.03514211  9309 -10.48854
## race_ethnicity.blOther                  -0.0546427 0.03167776  9309  -1.72496
## high.educ.blBachelor                     0.0205633 0.02651289  9309   0.77560
## high.educ.blSome College                 0.0494810 0.03038591  9309   1.62842
## high.educ.blHS Diploma/GED              -0.1309304 0.04314274  9309  -3.03482
## high.educ.bl< HS Diploma                -0.1124291 0.05555319  9309  -2.02381
## prnt.empl.blStay at Home Parent          0.0311560 0.02695727  9309   1.15575
## prnt.empl.blUnemployed                   0.1285039 0.04442650  9309   2.89251
## prnt.empl.blOther                        0.1997699 0.03884139  9309   5.14322
## neighb_phenx_avg_p.bl.cm                -0.1174866 0.01121878  9309 -10.47231
## overall.income.bl[>=50K & <100K]         0.1076249 0.02683549  9309   4.01054
## overall.income.bl[<50k]                  0.1678532 0.03380331  9309   4.96558
## overall.income.bl[Don't Know or Refuse]  0.0690120 0.04256232  9309   1.62143
## sex.blFemale                             0.0499342 0.01970427  9309   2.53418
##                                         p-value
## (Intercept)                              0.0000
## interview_age.c9.y                       0.5808
## race_ethnicity.blHispanic                0.2929
## race_ethnicity.blBlack                   0.0000
## race_ethnicity.blOther                   0.0846
## high.educ.blBachelor                     0.4380
## high.educ.blSome College                 0.1035
## high.educ.blHS Diploma/GED               0.0024
## high.educ.bl< HS Diploma                 0.0430
## prnt.empl.blStay at Home Parent          0.2478
## prnt.empl.blUnemployed                   0.0038
## prnt.empl.blOther                        0.0000
## neighb_phenx_avg_p.bl.cm                 0.0000
## overall.income.bl[>=50K & <100K]         0.0001
## overall.income.bl[<50k]                  0.0000
## overall.income.bl[Don't Know or Refuse]  0.1050
## sex.blFemale                             0.0113
##  Correlation: 
##                                         (Intr) in_.9. rc_t.H rc_t.B rc_t.O
## interview_age.c9.y                      -0.237                            
## race_ethnicity.blHispanic               -0.156  0.007                     
## race_ethnicity.blBlack                  -0.136  0.011  0.344              
## race_ethnicity.blOther                  -0.177  0.004  0.283  0.253       
## high.educ.blBachelor                    -0.272 -0.003 -0.018 -0.012 -0.001
## high.educ.blSome College                -0.186  0.003 -0.112 -0.084 -0.024
## high.educ.blHS Diploma/GED              -0.116  0.009 -0.144 -0.143 -0.006
## high.educ.bl< HS Diploma                -0.074  0.002 -0.171 -0.074 -0.010
## prnt.empl.blStay at Home Parent         -0.137  0.010  0.043  0.092  0.017
## prnt.empl.blUnemployed                  -0.053  0.004  0.008 -0.041  0.009
## prnt.empl.blOther                       -0.060  0.004  0.040  0.009 -0.012
## neighb_phenx_avg_p.bl.cm                -0.137 -0.005  0.044  0.153  0.047
## overall.income.bl[>=50K & <100K]        -0.214  0.000 -0.094 -0.062 -0.015
## overall.income.bl[<50k]                 -0.134  0.000 -0.151 -0.186 -0.082
## overall.income.bl[Don't Know or Refuse] -0.118 -0.001 -0.101 -0.126 -0.062
## sex.blFemale                            -0.290  0.006 -0.008 -0.017 -0.018
##                                         hgh..B hg..SC h..HSD h..<HD p..aHP
## interview_age.c9.y                                                        
## race_ethnicity.blHispanic                                                 
## race_ethnicity.blBlack                                                    
## race_ethnicity.blOther                                                    
## high.educ.blBachelor                                                      
## high.educ.blSome College                 0.455                            
## high.educ.blHS Diploma/GED               0.330  0.493                     
## high.educ.bl< HS Diploma                 0.262  0.406  0.372              
## prnt.empl.blStay at Home Parent         -0.028 -0.014 -0.051 -0.094       
## prnt.empl.blUnemployed                  -0.008 -0.009 -0.068 -0.097  0.147
## prnt.empl.blOther                       -0.012 -0.033 -0.010 -0.019  0.158
## neighb_phenx_avg_p.bl.cm                -0.006  0.061  0.057  0.055  0.028
## overall.income.bl[>=50K & <100K]        -0.174 -0.277 -0.171 -0.114 -0.032
## overall.income.bl[<50k]                 -0.159 -0.418 -0.365 -0.308 -0.054
## overall.income.bl[Don't Know or Refuse] -0.100 -0.250 -0.237 -0.219 -0.077
## sex.blFemale                             0.015  0.023  0.015 -0.004 -0.006
##                                         prn..U prn..O n___.. o..[&< o..[<5
## interview_age.c9.y                                                        
## race_ethnicity.blHispanic                                                 
## race_ethnicity.blBlack                                                    
## race_ethnicity.blOther                                                    
## high.educ.blBachelor                                                      
## high.educ.blSome College                                                  
## high.educ.blHS Diploma/GED                                                
## high.educ.bl< HS Diploma                                                  
## prnt.empl.blStay at Home Parent                                           
## prnt.empl.blUnemployed                                                    
## prnt.empl.blOther                        0.131                            
## neighb_phenx_avg_p.bl.cm                 0.022  0.004                     
## overall.income.bl[>=50K & <100K]        -0.015 -0.050  0.085              
## overall.income.bl[<50k]                 -0.101 -0.140  0.156  0.504       
## overall.income.bl[Don't Know or Refuse] -0.078 -0.098  0.087  0.358  0.481
## sex.blFemale                             0.020  0.020  0.027 -0.006 -0.007
##                                         o..KoR
## interview_age.c9.y                            
## race_ethnicity.blHispanic                     
## race_ethnicity.blBlack                        
## race_ethnicity.blOther                        
## high.educ.blBachelor                          
## high.educ.blSome College                      
## high.educ.blHS Diploma/GED                    
## high.educ.bl< HS Diploma                      
## prnt.empl.blStay at Home Parent               
## prnt.empl.blUnemployed                        
## prnt.empl.blOther                             
## neighb_phenx_avg_p.bl.cm                      
## overall.income.bl[>=50K & <100K]              
## overall.income.bl[<50k]                       
## overall.income.bl[Don't Know or Refuse]       
## sex.blFemale                             0.008
## 
## Standardized Within-Group Residuals:
##        Min         Q1        Med         Q3        Max 
## -3.0404923 -0.7894731 -0.1971850  0.4346512  4.2592012 
## 
## Number of Observations: 23857
## Number of Groups: 
##                abcd_site subjectid %in% abcd_site 
##                       21                     9345
summary(internal_zinb_r$zi.fit)
## Linear mixed-effects model fit by maximum likelihood
##   Data: data 
##   AIC BIC logLik
##    NA  NA     NA
## 
## Random effects:
##  Formula: ~1 | abcd_site
##         (Intercept)  Residual
## StdDev:   0.3953535 0.6086933
## 
## Variance function:
##  Structure: fixed weights
##  Formula: ~invwt 
## Fixed effects:  zp ~ interview_age.c9.y + race_ethnicity.bl + high.educ.bl +      prnt.empl.bl + neighb_phenx_avg_p.bl.cm + overall.income.bl +      sex.bl 
##                                             Value  Std.Error    DF   t-value
## (Intercept)                             -4.095139 0.11408030 23820 -35.89698
## interview_age.c9.y                       0.126396 0.02281688 23820   5.53956
## race_ethnicity.blHispanic                0.104012 0.07676474 23820   1.35494
## race_ethnicity.blBlack                   0.907727 0.07370631 23820  12.31546
## race_ethnicity.blOther                   0.395088 0.07552753 23820   5.23104
## high.educ.blBachelor                     0.127581 0.06659833 23820   1.91568
## high.educ.blSome College                 0.046868 0.07603602 23820   0.61639
## high.educ.blHS Diploma/GED               0.589169 0.09129453 23820   6.45350
## high.educ.bl< HS Diploma                 0.812960 0.10625712 23820   7.65088
## prnt.empl.blStay at Home Parent         -0.046392 0.06526792 23820  -0.71079
## prnt.empl.blUnemployed                  -0.114445 0.09341298 23820  -1.22515
## prnt.empl.blOther                        0.027601 0.08717385 23820   0.31662
## neighb_phenx_avg_p.bl.cm                 0.248386 0.02659340 23820   9.34015
## overall.income.bl[>=50K & <100K]        -0.065052 0.06931711 23820  -0.93847
## overall.income.bl[<50k]                  0.019233 0.08040320 23820   0.23921
## overall.income.bl[Don't Know or Refuse]  0.306092 0.08945439 23820   3.42177
## sex.blFemale                            -0.073846 0.04608496 23820  -1.60239
##                                         p-value
## (Intercept)                              0.0000
## interview_age.c9.y                       0.0000
## race_ethnicity.blHispanic                0.1754
## race_ethnicity.blBlack                   0.0000
## race_ethnicity.blOther                   0.0000
## high.educ.blBachelor                     0.0554
## high.educ.blSome College                 0.5376
## high.educ.blHS Diploma/GED               0.0000
## high.educ.bl< HS Diploma                 0.0000
## prnt.empl.blStay at Home Parent          0.4772
## prnt.empl.blUnemployed                   0.2205
## prnt.empl.blOther                        0.7515
## neighb_phenx_avg_p.bl.cm                 0.0000
## overall.income.bl[>=50K & <100K]         0.3480
## overall.income.bl[<50k]                  0.8109
## overall.income.bl[Don't Know or Refuse]  0.0006
## sex.blFemale                             0.1091
##  Correlation: 
##                                         (Intr) in_.9. rc_t.H rc_t.B rc_t.O
## interview_age.c9.y                      -0.392                            
## race_ethnicity.blHispanic               -0.136  0.011                     
## race_ethnicity.blBlack                  -0.152  0.032  0.460              
## race_ethnicity.blOther                  -0.169  0.004  0.352  0.340       
## high.educ.blBachelor                    -0.215 -0.007 -0.012 -0.005  0.008
## high.educ.blSome College                -0.147  0.010 -0.115 -0.103 -0.007
## high.educ.blHS Diploma/GED              -0.116  0.027 -0.160 -0.162  0.002
## high.educ.bl< HS Diploma                -0.081  0.011 -0.190 -0.106 -0.003
## prnt.empl.blStay at Home Parent         -0.103  0.020  0.045  0.106  0.022
## prnt.empl.blUnemployed                  -0.041  0.010  0.017 -0.022  0.012
## prnt.empl.blOther                       -0.043  0.011  0.057  0.012 -0.009
## neighb_phenx_avg_p.bl.cm                -0.120 -0.001  0.031  0.147  0.038
## overall.income.bl[>=50K & <100K]        -0.150 -0.003 -0.110 -0.094 -0.027
## overall.income.bl[<50k]                 -0.104 -0.004 -0.157 -0.214 -0.082
## overall.income.bl[Don't Know or Refuse] -0.099  0.008 -0.121 -0.156 -0.070
## sex.blFemale                            -0.190  0.015 -0.002 -0.024 -0.017
##                                         hgh..B hg..SC h..HSD h..<HD p..aHP
## interview_age.c9.y                                                        
## race_ethnicity.blHispanic                                                 
## race_ethnicity.blBlack                                                    
## race_ethnicity.blOther                                                    
## high.educ.blBachelor                                                      
## high.educ.blSome College                 0.479                            
## high.educ.blHS Diploma/GED               0.410  0.587                     
## high.educ.bl< HS Diploma                 0.359  0.526  0.543              
## prnt.empl.blStay at Home Parent         -0.024 -0.013 -0.057 -0.125       
## prnt.empl.blUnemployed                  -0.018 -0.005 -0.081 -0.120  0.177
## prnt.empl.blOther                       -0.017 -0.032 -0.021 -0.037  0.167
## neighb_phenx_avg_p.bl.cm                 0.002  0.050  0.052  0.060  0.031
## overall.income.bl[>=50K & <100K]        -0.165 -0.281 -0.205 -0.155 -0.023
## overall.income.bl[<50k]                 -0.159 -0.408 -0.412 -0.390 -0.051
## overall.income.bl[Don't Know or Refuse] -0.119 -0.296 -0.309 -0.288 -0.095
## sex.blFemale                             0.006  0.009  0.010 -0.013  0.012
##                                         prn..U prn..O n___.. o..[&< o..[<5
## interview_age.c9.y                                                        
## race_ethnicity.blHispanic                                                 
## race_ethnicity.blBlack                                                    
## race_ethnicity.blOther                                                    
## high.educ.blBachelor                                                      
## high.educ.blSome College                                                  
## high.educ.blHS Diploma/GED                                                
## high.educ.bl< HS Diploma                                                  
## prnt.empl.blStay at Home Parent                                           
## prnt.empl.blUnemployed                                                    
## prnt.empl.blOther                        0.149                            
## neighb_phenx_avg_p.bl.cm                 0.024 -0.008                     
## overall.income.bl[>=50K & <100K]        -0.014 -0.049  0.065              
## overall.income.bl[<50k]                 -0.081 -0.126  0.129  0.535       
## overall.income.bl[Don't Know or Refuse] -0.082 -0.110  0.074  0.437  0.604
## sex.blFemale                             0.035  0.015  0.020 -0.007 -0.003
##                                         o..KoR
## interview_age.c9.y                            
## race_ethnicity.blHispanic                     
## race_ethnicity.blBlack                        
## race_ethnicity.blOther                        
## high.educ.blBachelor                          
## high.educ.blSome College                      
## high.educ.blHS Diploma/GED                    
## high.educ.bl< HS Diploma                      
## prnt.empl.blStay at Home Parent               
## prnt.empl.blUnemployed                        
## prnt.empl.blOther                             
## neighb_phenx_avg_p.bl.cm                      
## overall.income.bl[>=50K & <100K]              
## overall.income.bl[<50k]                       
## overall.income.bl[Don't Know or Refuse]       
## sex.blFemale                            -0.004
## 
## Standardized Within-Group Residuals:
##        Min         Q1        Med         Q3        Max 
## -0.8137244 -0.3078959 -0.2384644 -0.1774828 19.5714128 
## 
## Number of Observations: 23857
## Number of Groups: 21
anova(internal_zinb_r)
##                          numDF denDF   F-value p-value
## (Intercept)                  1 14511 2950.3403  <.0001
## interview_age.c9.y           1 14511    0.2593  0.6106
## race_ethnicity.bl            3  9309   19.5365  <.0001
## high.educ.bl                 4  9309   12.1290  <.0001
## prnt.empl.bl                 3  9309   15.4056  <.0001
## neighb_phenx_avg_p.bl.cm     1  9309  131.4105  <.0001
## overall.income.bl            3  9309    9.7692  <.0001
## sex.bl                       1  9309    6.4221  0.0113
VarCorr(internal_zinb_r)
##             Variance     StdDev   
## abcd_site = pdLogChol(1)          
## (Intercept) 0.01042867   0.1021209
## subjectid = pdLogChol(1)          
## (Intercept) 0.72540687   0.8517082
## Residual    1.22627555   1.1073733

Assumption checking for ZINB Models

  • Zero inflated negative binomial (zinb) regression already has overdispersion and excess zeros and this is accounted for in the zinb modeling chosen, “The data distribution combines the negative binomial distribution and the logit distribution”

  • Details on zinb can be found here: link

For Model Checking we will follow the following pdf: link This info is further detailed/published in books by Cameron and Trivedi (2013) and Hilbe (2014) and in Garay, Hashimoto, Ortega, and Lachos (2011).

They suggest using Pearson residuals.

#Check outlier/residuals with this df
internal_res <- df_cc
internal_res$level1_resid.raw <- residuals(internal_zinb_r)
internal_res$level1_resid.pearson <- residuals(internal_zinb_r, type="pearson")
#Add predicted values (Yhat)
internal_res$cbcl_scr_syn_internal_r_predicted <- predict(internal_zinb_r,internal_res,type="response")
#Incidence
internal_res$incidence <- estimate.probability(internal_res$cbcl_scr_syn_internal_r, method="empirical")

#Plotting histogram of residuals, but may be skewed since using ZINB, so make sure to check below plots
hist(internal_res$level1_resid.pearson)

Incidence vs. X’s Plots

“These plots show each of the independent variables plotted against the incidence as measured by Y (CBCL Outcome). They should be scanned for outliers and curvilinear patterns.”

#age
ggplot(internal_res,aes(incidence,interview_age)) + geom_point(color = "black") + geom_smooth(method = "loess")
## `geom_smooth()` using formula = 'y ~ x'

Residuals vs Y (CBCL Outcome) Plot

“This plot shows the residuals versus the dependent variable. It can be used to spot outliers.”

plot(internal_res$level1_resid.pearson, internal_res$cbcl_scr_syn_internal_r)

Residuals vs Yhat Plot

“This plot shows the residuals versus the predicted value (Yhat) of the dependent variable. It can show outliers.”

plot(internal_res$level1_resid.pearson, internal_res$cbcl_scr_syn_internal_r_predicted)

Residuals vs Row Plot

“This plot shows the residuals versus the row numbers. It is used to quickly spot rows that have large residuals.”

plot(as.numeric(rownames(internal_res)),internal_res$level1_resid.pearson)

Residuals vs X’s Plots

“These plots show the residuals plotted against the independent variables. They are used to spot outliers. They are also used to find curvilinear patterns that are not represented in the regression model.”

#age
ggplot(internal_res,aes(level1_resid.pearson,interview_age)) + geom_point(color = "black") + geom_smooth(method = "loess")
## `geom_smooth()` using formula = 'y ~ x'

For below models, view Internalizing above for notes.

Externalizing

CBCL + AP Longitudinal Models

external_zinb_r <- glmm.zinb(cbcl_scr_syn_external_r ~ interview_age.c9.y + race_ethnicity.bl + high.educ.bl+ prnt.empl.bl  + neighb_phenx_avg_p.bl.cm + overall.income.bl + sex.bl, random = ~1|abcd_site/subjectid,
              zi_fixed = ~ interview_age.c9.y + race_ethnicity.bl + high.educ.bl+ prnt.empl.bl  + neighb_phenx_avg_p.bl.cm + overall.income.bl + sex.bl, zi_random = ~1|abcd_site, data = df_cc)
## Computational iterations: 9 
## Computational time: 1.169 minutes
summary(external_zinb_r)
## Linear mixed-effects model fit by maximum likelihood
##   Data: df_cc 
##   AIC BIC logLik
##    NA  NA     NA
## 
## Random effects:
##  Formula: ~1 | abcd_site
##         (Intercept)
## StdDev:   0.1254936
## 
##  Formula: ~1 | subjectid %in% abcd_site
##         (Intercept) Residual
## StdDev:    1.099871 1.058043
## 
## Variance function:
##  Structure: fixed weights
##  Formula: ~invwt 
## Fixed effects:  cbcl_scr_syn_external_r ~ interview_age.c9.y + race_ethnicity.bl +      high.educ.bl + prnt.empl.bl + neighb_phenx_avg_p.bl.cm +      overall.income.bl + sex.bl 
##                                              Value  Std.Error    DF    t-value
## (Intercept)                              0.9429491 0.04128323 14511  22.840969
## interview_age.c9.y                      -0.0388390 0.00472466 14511  -8.220496
## race_ethnicity.blHispanic               -0.0658765 0.03973144  9309  -1.658045
## race_ethnicity.blBlack                  -0.1128041 0.04416806  9309  -2.553975
## race_ethnicity.blOther                  -0.0418034 0.04070132  9309  -1.027076
## high.educ.blBachelor                     0.1087898 0.03415121  9309   3.185532
## high.educ.blSome College                 0.1948234 0.03890717  9309   5.007390
## high.educ.blHS Diploma/GED               0.0686531 0.05457601  9309   1.257936
## high.educ.bl< HS Diploma                 0.0979019 0.07026626  9309   1.393299
## prnt.empl.blStay at Home Parent          0.0116881 0.03457543  9309   0.338048
## prnt.empl.blUnemployed                   0.1983548 0.05599885  9309   3.542122
## prnt.empl.blOther                        0.1971107 0.04941824  9309   3.988622
## neighb_phenx_avg_p.bl.cm                -0.1247092 0.01428901  9309  -8.727629
## overall.income.bl[>=50K & <100K]         0.1264413 0.03450719  9309   3.664202
## overall.income.bl[<50k]                  0.2620838 0.04312837  9309   6.076832
## overall.income.bl[Don't Know or Refuse]  0.1639139 0.05410510  9309   3.029547
## sex.blFemale                            -0.2978009 0.02525140  9309 -11.793441
##                                         p-value
## (Intercept)                              0.0000
## interview_age.c9.y                       0.0000
## race_ethnicity.blHispanic                0.0973
## race_ethnicity.blBlack                   0.0107
## race_ethnicity.blOther                   0.3044
## high.educ.blBachelor                     0.0014
## high.educ.blSome College                 0.0000
## high.educ.blHS Diploma/GED               0.2084
## high.educ.bl< HS Diploma                 0.1636
## prnt.empl.blStay at Home Parent          0.7353
## prnt.empl.blUnemployed                   0.0004
## prnt.empl.blOther                        0.0001
## neighb_phenx_avg_p.bl.cm                 0.0000
## overall.income.bl[>=50K & <100K]         0.0002
## overall.income.bl[<50k]                  0.0000
## overall.income.bl[Don't Know or Refuse]  0.0025
## sex.blFemale                             0.0000
##  Correlation: 
##                                         (Intr) in_.9. rc_t.H rc_t.B rc_t.O
## interview_age.c9.y                      -0.202                            
## race_ethnicity.blHispanic               -0.161  0.006                     
## race_ethnicity.blBlack                  -0.141  0.011  0.351              
## race_ethnicity.blOther                  -0.184  0.004  0.285  0.260       
## high.educ.blBachelor                    -0.282 -0.002 -0.021 -0.014 -0.002
## high.educ.blSome College                -0.195  0.003 -0.112 -0.085 -0.025
## high.educ.blHS Diploma/GED              -0.122  0.007 -0.145 -0.148 -0.009
## high.educ.bl< HS Diploma                -0.078  0.001 -0.173 -0.080 -0.013
## prnt.empl.blStay at Home Parent         -0.140  0.009  0.042  0.092  0.017
## prnt.empl.blUnemployed                  -0.053  0.003  0.008 -0.042  0.010
## prnt.empl.blOther                       -0.061  0.003  0.041  0.011 -0.011
## neighb_phenx_avg_p.bl.cm                -0.141 -0.004  0.046  0.152  0.049
## overall.income.bl[>=50K & <100K]        -0.223 -0.001 -0.091 -0.063 -0.012
## overall.income.bl[<50k]                 -0.141  0.000 -0.148 -0.186 -0.081
## overall.income.bl[Don't Know or Refuse] -0.125  0.000 -0.101 -0.127 -0.057
## sex.blFemale                            -0.288  0.005 -0.009 -0.019 -0.017
##                                         hgh..B hg..SC h..HSD h..<HD p..aHP
## interview_age.c9.y                                                        
## race_ethnicity.blHispanic                                                 
## race_ethnicity.blBlack                                                    
## race_ethnicity.blOther                                                    
## high.educ.blBachelor                                                      
## high.educ.blSome College                 0.461                            
## high.educ.blHS Diploma/GED               0.339  0.503                     
## high.educ.bl< HS Diploma                 0.269  0.414  0.381              
## prnt.empl.blStay at Home Parent         -0.030 -0.015 -0.050 -0.096       
## prnt.empl.blUnemployed                  -0.009 -0.010 -0.069 -0.099  0.149
## prnt.empl.blOther                       -0.013 -0.033 -0.014 -0.020  0.159
## neighb_phenx_avg_p.bl.cm                -0.006  0.060  0.056  0.053  0.029
## overall.income.bl[>=50K & <100K]        -0.175 -0.276 -0.174 -0.116 -0.029
## overall.income.bl[<50k]                 -0.160 -0.418 -0.367 -0.311 -0.053
## overall.income.bl[Don't Know or Refuse] -0.100 -0.254 -0.241 -0.221 -0.076
## sex.blFemale                             0.013  0.022  0.014 -0.005 -0.006
##                                         prn..U prn..O n___.. o..[&< o..[<5
## interview_age.c9.y                                                        
## race_ethnicity.blHispanic                                                 
## race_ethnicity.blBlack                                                    
## race_ethnicity.blOther                                                    
## high.educ.blBachelor                                                      
## high.educ.blSome College                                                  
## high.educ.blHS Diploma/GED                                                
## high.educ.bl< HS Diploma                                                  
## prnt.empl.blStay at Home Parent                                           
## prnt.empl.blUnemployed                                                    
## prnt.empl.blOther                        0.134                            
## neighb_phenx_avg_p.bl.cm                 0.024  0.004                     
## overall.income.bl[>=50K & <100K]        -0.014 -0.049  0.084              
## overall.income.bl[<50k]                 -0.101 -0.139  0.158  0.508       
## overall.income.bl[Don't Know or Refuse] -0.079 -0.099  0.087  0.364  0.489
## sex.blFemale                             0.018  0.017  0.028 -0.006 -0.007
##                                         o..KoR
## interview_age.c9.y                            
## race_ethnicity.blHispanic                     
## race_ethnicity.blBlack                        
## race_ethnicity.blOther                        
## high.educ.blBachelor                          
## high.educ.blSome College                      
## high.educ.blHS Diploma/GED                    
## high.educ.bl< HS Diploma                      
## prnt.empl.blStay at Home Parent               
## prnt.empl.blUnemployed                        
## prnt.empl.blOther                             
## neighb_phenx_avg_p.bl.cm                      
## overall.income.bl[>=50K & <100K]              
## overall.income.bl[<50k]                       
## overall.income.bl[Don't Know or Refuse]       
## sex.blFemale                             0.007
## 
## Standardized Within-Group Residuals:
##        Min         Q1        Med         Q3        Max 
## -3.1969701 -0.7289727 -0.2621957  0.3860818  4.4722136 
## 
## Number of Observations: 23857
## Number of Groups: 
##                abcd_site subjectid %in% abcd_site 
##                       21                     9345
summary(external_zinb_r$zi.fit)
## Linear mixed-effects model fit by maximum likelihood
##   Data: data 
##   AIC BIC logLik
##    NA  NA     NA
## 
## Random effects:
##  Formula: ~1 | abcd_site
##         (Intercept)  Residual
## StdDev:   0.2626422 0.5426526
## 
## Variance function:
##  Structure: fixed weights
##  Formula: ~invwt 
## Fixed effects:  zp ~ interview_age.c9.y + race_ethnicity.bl + high.educ.bl +      prnt.empl.bl + neighb_phenx_avg_p.bl.cm + overall.income.bl +      sex.bl 
##                                             Value  Std.Error    DF   t-value
## (Intercept)                             -4.099752 0.08505026 23820 -48.20388
## interview_age.c9.y                       0.214792 0.01927783 23820  11.14192
## race_ethnicity.blHispanic                0.219816 0.06303372 23820   3.48728
## race_ethnicity.blBlack                   0.607992 0.06540608 23820   9.29565
## race_ethnicity.blOther                   0.376809 0.06093286 23820   6.18400
## high.educ.blBachelor                     0.107057 0.05363891 23820   1.99588
## high.educ.blSome College                 0.148441 0.06230576 23820   2.38247
## high.educ.blHS Diploma/GED               0.415366 0.08145102 23820   5.09959
## high.educ.bl< HS Diploma                 0.546205 0.09850925 23820   5.54471
## prnt.empl.blStay at Home Parent          0.025767 0.05387006 23820   0.47832
## prnt.empl.blUnemployed                   0.020936 0.08281601 23820   0.25280
## prnt.empl.blOther                       -0.298640 0.08717990 23820  -3.42556
## neighb_phenx_avg_p.bl.cm                 0.200864 0.02304520 23820   8.71610
## overall.income.bl[>=50K & <100K]        -0.202154 0.05599558 23820  -3.61018
## overall.income.bl[<50k]                 -0.158909 0.06768490 23820  -2.34777
## overall.income.bl[Don't Know or Refuse]  0.175260 0.07675483 23820   2.28338
## sex.blFemale                             0.226257 0.03896374 23820   5.80687
##                                         p-value
## (Intercept)                              0.0000
## interview_age.c9.y                       0.0000
## race_ethnicity.blHispanic                0.0005
## race_ethnicity.blBlack                   0.0000
## race_ethnicity.blOther                   0.0000
## high.educ.blBachelor                     0.0460
## high.educ.blSome College                 0.0172
## high.educ.blHS Diploma/GED               0.0000
## high.educ.bl< HS Diploma                 0.0000
## prnt.empl.blStay at Home Parent          0.6324
## prnt.empl.blUnemployed                   0.8004
## prnt.empl.blOther                        0.0006
## neighb_phenx_avg_p.bl.cm                 0.0000
## overall.income.bl[>=50K & <100K]         0.0003
## overall.income.bl[<50k]                  0.0189
## overall.income.bl[Don't Know or Refuse]  0.0224
## sex.blFemale                             0.0000
##  Correlation: 
##                                         (Intr) in_.9. rc_t.H rc_t.B rc_t.O
## interview_age.c9.y                      -0.464                            
## race_ethnicity.blHispanic               -0.153  0.014                     
## race_ethnicity.blBlack                  -0.154  0.033  0.403              
## race_ethnicity.blOther                  -0.180  0.006  0.333  0.304       
## high.educ.blBachelor                    -0.231 -0.007 -0.007 -0.004  0.013
## high.educ.blSome College                -0.160  0.007 -0.117 -0.092 -0.008
## high.educ.blHS Diploma/GED              -0.115  0.023 -0.156 -0.152  0.002
## high.educ.bl< HS Diploma                -0.080  0.010 -0.176 -0.091  0.000
## prnt.empl.blStay at Home Parent         -0.117  0.022  0.047  0.105  0.021
## prnt.empl.blUnemployed                  -0.048  0.009  0.013 -0.033  0.012
## prnt.empl.blOther                       -0.040  0.008  0.042  0.002 -0.013
## neighb_phenx_avg_p.bl.cm                -0.137 -0.001  0.037  0.146  0.038
## overall.income.bl[>=50K & <100K]        -0.148 -0.001 -0.099 -0.082 -0.022
## overall.income.bl[<50k]                 -0.096 -0.001 -0.146 -0.204 -0.082
## overall.income.bl[Don't Know or Refuse] -0.094  0.008 -0.110 -0.142 -0.066
## sex.blFemale                            -0.252  0.015 -0.003 -0.017 -0.011
##                                         hgh..B hg..SC h..HSD h..<HD p..aHP
## interview_age.c9.y                                                        
## race_ethnicity.blHispanic                                                 
## race_ethnicity.blBlack                                                    
## race_ethnicity.blOther                                                    
## high.educ.blBachelor                                                      
## high.educ.blSome College                 0.460                            
## high.educ.blHS Diploma/GED               0.363  0.539                     
## high.educ.bl< HS Diploma                 0.306  0.470  0.465              
## prnt.empl.blStay at Home Parent         -0.031 -0.020 -0.058 -0.116       
## prnt.empl.blUnemployed                  -0.016 -0.009 -0.078 -0.116  0.163
## prnt.empl.blOther                       -0.018 -0.027 -0.010 -0.028  0.140
## neighb_phenx_avg_p.bl.cm                -0.001  0.053  0.052  0.060  0.030
## overall.income.bl[>=50K & <100K]        -0.159 -0.284 -0.189 -0.136 -0.022
## overall.income.bl[<50k]                 -0.150 -0.413 -0.397 -0.357 -0.048
## overall.income.bl[Don't Know or Refuse] -0.108 -0.285 -0.289 -0.271 -0.092
## sex.blFemale                             0.013  0.017  0.012 -0.004  0.004
##                                         prn..U prn..O n___.. o..[&< o..[<5
## interview_age.c9.y                                                        
## race_ethnicity.blHispanic                                                 
## race_ethnicity.blBlack                                                    
## race_ethnicity.blOther                                                    
## high.educ.blBachelor                                                      
## high.educ.blSome College                                                  
## high.educ.blHS Diploma/GED                                                
## high.educ.bl< HS Diploma                                                  
## prnt.empl.blStay at Home Parent                                           
## prnt.empl.blUnemployed                                                    
## prnt.empl.blOther                        0.124                            
## neighb_phenx_avg_p.bl.cm                 0.030 -0.004                     
## overall.income.bl[>=50K & <100K]        -0.012 -0.041  0.076              
## overall.income.bl[<50k]                 -0.088 -0.117  0.139  0.490       
## overall.income.bl[Don't Know or Refuse] -0.083 -0.099  0.089  0.388  0.546
## sex.blFemale                             0.030  0.012  0.025 -0.006 -0.004
##                                         o..KoR
## interview_age.c9.y                            
## race_ethnicity.blHispanic                     
## race_ethnicity.blBlack                        
## race_ethnicity.blOther                        
## high.educ.blBachelor                          
## high.educ.blSome College                      
## high.educ.blHS Diploma/GED                    
## high.educ.bl< HS Diploma                      
## prnt.empl.blStay at Home Parent               
## prnt.empl.blUnemployed                        
## prnt.empl.blOther                             
## neighb_phenx_avg_p.bl.cm                      
## overall.income.bl[>=50K & <100K]              
## overall.income.bl[<50k]                       
## overall.income.bl[Don't Know or Refuse]       
## sex.blFemale                             0.000
## 
## Standardized Within-Group Residuals:
##        Min         Q1        Med         Q3        Max 
## -0.7850906 -0.3556462 -0.2854172  0.2325046 17.3825587 
## 
## Number of Observations: 23857
## Number of Groups: 21
anova(external_zinb_r)
##                          numDF denDF  F-value p-value
## (Intercept)                  1 14511 966.2379  <.0001
## interview_age.c9.y           1 14511  69.8714  <.0001
## race_ethnicity.bl            3  9309   6.2238   3e-04
## high.educ.bl                 4  9309  31.8134  <.0001
## prnt.empl.bl                 3  9309  15.2470  <.0001
## neighb_phenx_avg_p.bl.cm     1  9309  89.7652  <.0001
## overall.income.bl            3  9309  12.1165  <.0001
## sex.bl                       1  9309 139.0852  <.0001
VarCorr(external_zinb_r)
##             Variance     StdDev   
## abcd_site = pdLogChol(1)          
## (Intercept) 0.01574865   0.1254936
## subjectid = pdLogChol(1)          
## (Intercept) 1.20971568   1.0998708
## Residual    1.11945536   1.0580432

Assumption checking for ZINB Models

#Check outlier/residuals with this df
external_res <- df_cc
external_res$level1_resid.raw <- residuals(external_zinb_r)
external_res$level1_resid.pearson <- residuals(external_zinb_r, type="pearson")
#Add predicted values (Yhat)
external_res$cbcl_scr_syn_external_r_predicted <- predict(external_zinb_r,external_res,type="response")
#Incidence
external_res$incidence <- estimate.probability(external_res$cbcl_scr_syn_external_r, method="empirical")

#Plotting histogram of residuals, but may be skewed since using ZINB, so make sure to check below plots
hist(external_res$level1_resid.pearson)

### Incidence vs. X’s Plots

#age
ggplot(external_res,aes(incidence,interview_age)) + geom_point(color = "black") + geom_smooth(method = "loess")
## `geom_smooth()` using formula = 'y ~ x'

### Residuals vs Y (CBCL Outcome) Plot

plot(external_res$level1_resid.pearson, external_res$cbcl_scr_syn_external_r)

### Residuals vs Yhat Plot

plot(external_res$level1_resid.pearson, external_res$cbcl_scr_syn_external_r_predicted)

### Residuals vs Row Plot

plot(as.numeric(rownames(external_res)),external_res$level1_resid.pearson)

### Residuals vs X’s Plots

#age
ggplot(external_res,aes(level1_resid.pearson,interview_age)) + geom_point(color = "black") + geom_smooth(method = "loess")
## `geom_smooth()` using formula = 'y ~ x'

Anxious/Depressed

CBCL + AP Longitudinal Models

anxdep_zinb_r <- glmm.zinb(cbcl_scr_syn_anxdep_r ~ interview_age.c9.y + race_ethnicity.bl + high.educ.bl+ prnt.empl.bl  + neighb_phenx_avg_p.bl.cm + overall.income.bl + sex.bl, random = ~1|abcd_site/subjectid,
              zi_fixed = ~ interview_age.c9.y + race_ethnicity.bl + high.educ.bl+ prnt.empl.bl  + neighb_phenx_avg_p.bl.cm + overall.income.bl + sex.bl, zi_random = ~1|abcd_site, data = df_cc)
## Computational iterations: 11 
## Computational time: 1.398 minutes
summary(anxdep_zinb_r)
## Linear mixed-effects model fit by maximum likelihood
##   Data: df_cc 
##   AIC BIC logLik
##    NA  NA     NA
## 
## Random effects:
##  Formula: ~1 | abcd_site
##         (Intercept)
## StdDev:   0.1121779
## 
##  Formula: ~1 | subjectid %in% abcd_site
##         (Intercept)  Residual
## StdDev:   0.9726568 0.9392575
## 
## Variance function:
##  Structure: fixed weights
##  Formula: ~invwt 
## Fixed effects:  cbcl_scr_syn_anxdep_r ~ interview_age.c9.y + race_ethnicity.bl +      high.educ.bl + prnt.empl.bl + neighb_phenx_avg_p.bl.cm +      overall.income.bl + sex.bl 
##                                              Value  Std.Error    DF    t-value
## (Intercept)                              0.5937127 0.03739623 14511  15.876270
## interview_age.c9.y                      -0.0243781 0.00506976 14511  -4.808528
## race_ethnicity.blHispanic               -0.0249894 0.03595399  9309  -0.695038
## race_ethnicity.blBlack                  -0.4486266 0.04109866  9309 -10.915845
## race_ethnicity.blOther                  -0.1019393 0.03675753  9309  -2.773289
## high.educ.blBachelor                    -0.0277787 0.03066520  9309  -0.905872
## high.educ.blSome College                -0.0537389 0.03524909  9309  -1.524548
## high.educ.blHS Diploma/GED              -0.2846188 0.05037691  9309  -5.649788
## high.educ.bl< HS Diploma                -0.2767381 0.06496771  9309  -4.259625
## prnt.empl.blStay at Home Parent          0.0402005 0.03126878  9309   1.285645
## prnt.empl.blUnemployed                   0.1838573 0.05155648  9309   3.566134
## prnt.empl.blOther                        0.1508819 0.04528546  9309   3.331796
## neighb_phenx_avg_p.bl.cm                -0.1080045 0.01304065  9309  -8.282143
## overall.income.bl[>=50K & <100K]         0.1111251 0.03105252  9309   3.578617
## overall.income.bl[<50k]                  0.1582101 0.03925333  9309   4.030488
## overall.income.bl[Don't Know or Refuse]  0.0311890 0.04956838  9309   0.629212
## sex.blFemale                             0.0552329 0.02286829  9309   2.415260
##                                         p-value
## (Intercept)                              0.0000
## interview_age.c9.y                       0.0000
## race_ethnicity.blHispanic                0.4870
## race_ethnicity.blBlack                   0.0000
## race_ethnicity.blOther                   0.0056
## high.educ.blBachelor                     0.3650
## high.educ.blSome College                 0.1274
## high.educ.blHS Diploma/GED               0.0000
## high.educ.bl< HS Diploma                 0.0000
## prnt.empl.blStay at Home Parent          0.1986
## prnt.empl.blUnemployed                   0.0004
## prnt.empl.blOther                        0.0009
## neighb_phenx_avg_p.bl.cm                 0.0000
## overall.income.bl[>=50K & <100K]         0.0003
## overall.income.bl[<50k]                  0.0001
## overall.income.bl[Don't Know or Refuse]  0.5292
## sex.blFemale                             0.0157
##  Correlation: 
##                                         (Intr) in_.9. rc_t.H rc_t.B rc_t.O
## interview_age.c9.y                      -0.240                            
## race_ethnicity.blHispanic               -0.159  0.008                     
## race_ethnicity.blBlack                  -0.139  0.012  0.340              
## race_ethnicity.blOther                  -0.181  0.004  0.282  0.250       
## high.educ.blBachelor                    -0.276 -0.002 -0.018 -0.011  0.000
## high.educ.blSome College                -0.188  0.003 -0.110 -0.082 -0.024
## high.educ.blHS Diploma/GED              -0.116  0.008 -0.142 -0.142 -0.006
## high.educ.bl< HS Diploma                -0.074  0.002 -0.171 -0.071 -0.011
## prnt.empl.blStay at Home Parent         -0.140  0.011  0.042  0.092  0.019
## prnt.empl.blUnemployed                  -0.055  0.004  0.008 -0.041  0.009
## prnt.empl.blOther                       -0.062  0.004  0.040  0.009 -0.011
## neighb_phenx_avg_p.bl.cm                -0.142 -0.004  0.045  0.155  0.046
## overall.income.bl[>=50K & <100K]        -0.219  0.000 -0.094 -0.061 -0.015
## overall.income.bl[<50k]                 -0.137  0.000 -0.152 -0.184 -0.083
## overall.income.bl[Don't Know or Refuse] -0.120 -0.001 -0.100 -0.123 -0.063
## sex.blFemale                            -0.298  0.005 -0.008 -0.018 -0.018
##                                         hgh..B hg..SC h..HSD h..<HD p..aHP
## interview_age.c9.y                                                        
## race_ethnicity.blHispanic                                                 
## race_ethnicity.blBlack                                                    
## race_ethnicity.blOther                                                    
## high.educ.blBachelor                                                      
## high.educ.blSome College                 0.453                            
## high.educ.blHS Diploma/GED               0.327  0.488                     
## high.educ.bl< HS Diploma                 0.259  0.402  0.367              
## prnt.empl.blStay at Home Parent         -0.031 -0.015 -0.053 -0.095       
## prnt.empl.blUnemployed                  -0.008 -0.009 -0.069 -0.098  0.147
## prnt.empl.blOther                       -0.013 -0.033 -0.011 -0.020  0.157
## neighb_phenx_avg_p.bl.cm                -0.007  0.062  0.058  0.055  0.029
## overall.income.bl[>=50K & <100K]        -0.175 -0.277 -0.169 -0.112 -0.031
## overall.income.bl[<50k]                 -0.160 -0.419 -0.363 -0.307 -0.052
## overall.income.bl[Don't Know or Refuse] -0.100 -0.249 -0.234 -0.216 -0.076
## sex.blFemale                             0.015  0.023  0.015 -0.003 -0.006
##                                         prn..U prn..O n___.. o..[&< o..[<5
## interview_age.c9.y                                                        
## race_ethnicity.blHispanic                                                 
## race_ethnicity.blBlack                                                    
## race_ethnicity.blOther                                                    
## high.educ.blBachelor                                                      
## high.educ.blSome College                                                  
## high.educ.blHS Diploma/GED                                                
## high.educ.bl< HS Diploma                                                  
## prnt.empl.blStay at Home Parent                                           
## prnt.empl.blUnemployed                                                    
## prnt.empl.blOther                        0.131                            
## neighb_phenx_avg_p.bl.cm                 0.024  0.005                     
## overall.income.bl[>=50K & <100K]        -0.015 -0.049  0.085              
## overall.income.bl[<50k]                 -0.102 -0.140  0.156  0.503       
## overall.income.bl[Don't Know or Refuse] -0.078 -0.097  0.087  0.355  0.476
## sex.blFemale                             0.020  0.020  0.027 -0.006 -0.007
##                                         o..KoR
## interview_age.c9.y                            
## race_ethnicity.blHispanic                     
## race_ethnicity.blBlack                        
## race_ethnicity.blOther                        
## high.educ.blBachelor                          
## high.educ.blSome College                      
## high.educ.blHS Diploma/GED                    
## high.educ.bl< HS Diploma                      
## prnt.empl.blStay at Home Parent               
## prnt.empl.blUnemployed                        
## prnt.empl.blOther                             
## neighb_phenx_avg_p.bl.cm                      
## overall.income.bl[>=50K & <100K]              
## overall.income.bl[<50k]                       
## overall.income.bl[Don't Know or Refuse]       
## sex.blFemale                             0.007
## 
## Standardized Within-Group Residuals:
##        Min         Q1        Med         Q3        Max 
## -2.8041137 -0.7602766 -0.2203188  0.4366498  4.4144158 
## 
## Number of Observations: 23857
## Number of Groups: 
##                abcd_site subjectid %in% abcd_site 
##                       21                     9345
summary(anxdep_zinb_r$zi.fit)
## Linear mixed-effects model fit by maximum likelihood
##   Data: data 
##   AIC BIC logLik
##    NA  NA     NA
## 
## Random effects:
##  Formula: ~1 | abcd_site
##         (Intercept)  Residual
## StdDev:   0.3811096 0.4309838
## 
## Variance function:
##  Structure: fixed weights
##  Formula: ~invwt 
## Fixed effects:  zp ~ interview_age.c9.y + race_ethnicity.bl + high.educ.bl +      prnt.empl.bl + neighb_phenx_avg_p.bl.cm + overall.income.bl +      sex.bl 
##                                             Value  Std.Error    DF   t-value
## (Intercept)                             -5.007919 0.10640506 23820 -47.06467
## interview_age.c9.y                       0.352666 0.01871702 23820  18.84198
## race_ethnicity.blHispanic                0.069959 0.06425351 23820   1.08879
## race_ethnicity.blBlack                   1.089479 0.05885231 23820  18.51208
## race_ethnicity.blOther                   0.342804 0.06465785 23820   5.30182
## high.educ.blBachelor                     0.347615 0.05660319 23820   6.14127
## high.educ.blSome College                 0.301698 0.06302923 23820   4.78663
## high.educ.blHS Diploma/GED               0.515374 0.07714294 23820   6.68077
## high.educ.bl< HS Diploma                 0.927745 0.08551122 23820  10.84940
## prnt.empl.blStay at Home Parent          0.016360 0.05344281 23820   0.30612
## prnt.empl.blUnemployed                  -0.064903 0.07214538 23820  -0.89961
## prnt.empl.blOther                        0.079382 0.06762996 23820   1.17376
## neighb_phenx_avg_p.bl.cm                 0.300654 0.02156670 23820  13.94068
## overall.income.bl[>=50K & <100K]        -0.174903 0.06026414 23820  -2.90228
## overall.income.bl[<50k]                  0.309110 0.06461715 23820   4.78371
## overall.income.bl[Don't Know or Refuse]  0.338358 0.07467982 23820   4.53079
## sex.blFemale                            -0.321314 0.03823591 23820  -8.40346
##                                         p-value
## (Intercept)                              0.0000
## interview_age.c9.y                       0.0000
## race_ethnicity.blHispanic                0.2763
## race_ethnicity.blBlack                   0.0000
## race_ethnicity.blOther                   0.0000
## high.educ.blBachelor                     0.0000
## high.educ.blSome College                 0.0000
## high.educ.blHS Diploma/GED               0.0000
## high.educ.bl< HS Diploma                 0.0000
## prnt.empl.blStay at Home Parent          0.7595
## prnt.empl.blUnemployed                   0.3683
## prnt.empl.blOther                        0.2405
## neighb_phenx_avg_p.bl.cm                 0.0000
## overall.income.bl[>=50K & <100K]         0.0037
## overall.income.bl[<50k]                  0.0000
## overall.income.bl[Don't Know or Refuse]  0.0000
## sex.blFemale                             0.0000
##  Correlation: 
##                                         (Intr) in_.9. rc_t.H rc_t.B rc_t.O
## interview_age.c9.y                      -0.387                            
## race_ethnicity.blHispanic               -0.126  0.012                     
## race_ethnicity.blBlack                  -0.151  0.043  0.490              
## race_ethnicity.blOther                  -0.153  0.005  0.357  0.365       
## high.educ.blBachelor                    -0.228 -0.003 -0.015  0.000  0.009
## high.educ.blSome College                -0.167  0.015 -0.106 -0.093 -0.006
## high.educ.blHS Diploma/GED              -0.131  0.028 -0.142 -0.146  0.003
## high.educ.bl< HS Diploma                -0.103  0.014 -0.177 -0.104 -0.008
## prnt.empl.blStay at Home Parent         -0.100  0.026  0.047  0.105  0.026
## prnt.empl.blUnemployed                  -0.038  0.011  0.014 -0.014  0.016
## prnt.empl.blOther                       -0.040  0.010  0.059  0.012 -0.003
## neighb_phenx_avg_p.bl.cm                -0.114  0.006  0.027  0.157  0.039
## overall.income.bl[>=50K & <100K]        -0.137 -0.005 -0.107 -0.101 -0.028
## overall.income.bl[<50k]                 -0.104  0.001 -0.168 -0.223 -0.087
## overall.income.bl[Don't Know or Refuse] -0.099  0.013 -0.121 -0.155 -0.069
## sex.blFemale                            -0.149  0.013 -0.003 -0.032 -0.020
##                                         hgh..B hg..SC h..HSD h..<HD p..aHP
## interview_age.c9.y                                                        
## race_ethnicity.blHispanic                                                 
## race_ethnicity.blBlack                                                    
## race_ethnicity.blOther                                                    
## high.educ.blBachelor                                                      
## high.educ.blSome College                 0.543                            
## high.educ.blHS Diploma/GED               0.454  0.623                     
## high.educ.bl< HS Diploma                 0.414  0.577  0.558              
## prnt.empl.blStay at Home Parent         -0.019 -0.009 -0.050 -0.120       
## prnt.empl.blUnemployed                  -0.022 -0.003 -0.082 -0.118  0.185
## prnt.empl.blOther                       -0.014 -0.029 -0.023 -0.028  0.175
## neighb_phenx_avg_p.bl.cm                 0.005  0.051  0.046  0.058  0.036
## overall.income.bl[>=50K & <100K]        -0.158 -0.272 -0.194 -0.157 -0.019
## overall.income.bl[<50k]                 -0.167 -0.419 -0.392 -0.379 -0.050
## overall.income.bl[Don't Know or Refuse] -0.121 -0.293 -0.283 -0.272 -0.087
## sex.blFemale                             0.009  0.009  0.013 -0.012  0.015
##                                         prn..U prn..O n___.. o..[&< o..[<5
## interview_age.c9.y                                                        
## race_ethnicity.blHispanic                                                 
## race_ethnicity.blBlack                                                    
## race_ethnicity.blOther                                                    
## high.educ.blBachelor                                                      
## high.educ.blSome College                                                  
## high.educ.blHS Diploma/GED                                                
## high.educ.bl< HS Diploma                                                  
## prnt.empl.blStay at Home Parent                                           
## prnt.empl.blUnemployed                                                    
## prnt.empl.blOther                        0.162                            
## neighb_phenx_avg_p.bl.cm                 0.033 -0.013                     
## overall.income.bl[>=50K & <100K]        -0.010 -0.040  0.057              
## overall.income.bl[<50k]                 -0.082 -0.129  0.128  0.550       
## overall.income.bl[Don't Know or Refuse] -0.081 -0.114  0.071  0.435  0.615
## sex.blFemale                             0.037  0.017  0.016  0.001 -0.002
##                                         o..KoR
## interview_age.c9.y                            
## race_ethnicity.blHispanic                     
## race_ethnicity.blBlack                        
## race_ethnicity.blOther                        
## high.educ.blBachelor                          
## high.educ.blSome College                      
## high.educ.blHS Diploma/GED                    
## high.educ.bl< HS Diploma                      
## prnt.empl.blStay at Home Parent               
## prnt.empl.blUnemployed                        
## prnt.empl.blOther                             
## neighb_phenx_avg_p.bl.cm                      
## overall.income.bl[>=50K & <100K]              
## overall.income.bl[<50k]                       
## overall.income.bl[Don't Know or Refuse]       
## sex.blFemale                            -0.003
## 
## Standardized Within-Group Residuals:
##        Min         Q1        Med         Q3        Max 
## -1.4245472 -0.3355471 -0.2311088  0.2379168 38.4888000 
## 
## Number of Observations: 23857
## Number of Groups: 21
anova(anxdep_zinb_r)
##                          numDF denDF  F-value p-value
## (Intercept)                  1 14511 394.8857  <.0001
## interview_age.c9.y           1 14511  22.2878  <.0001
## race_ethnicity.bl            3  9309  33.3004  <.0001
## high.educ.bl                 4  9309   5.8283  0.0001
## prnt.empl.bl                 3  9309  10.2481  <.0001
## neighb_phenx_avg_p.bl.cm     1  9309  82.5352  <.0001
## overall.income.bl            3  9309   7.6171  <.0001
## sex.bl                       1  9309   5.8335  0.0157

Assumption checking for ZINB Models

#Check outlier/residuals with this df
anxdep_res <- df_cc
anxdep_res$level1_resid.raw <- residuals(anxdep_zinb_r)
anxdep_res$level1_resid.pearson <- residuals(anxdep_zinb_r, type="pearson")
#Add predicted values (Yhat)
anxdep_res$cbcl_scr_syn_anxdep_r_predicted <- predict(anxdep_zinb_r,anxdep_res,type="response")
#Incidence
anxdep_res$incidence <- estimate.probability(anxdep_res$cbcl_scr_syn_anxdep_r, method="empirical")

#Plotting histogram of residuals, but may be skewed since using ZINB, so make sure to check below plots
hist(anxdep_res$level1_resid.pearson)

### Incidence vs. X’s Plots

#age
ggplot(anxdep_res,aes(incidence,interview_age)) + geom_point(color = "black") + geom_smooth(method = "loess")
## `geom_smooth()` using formula = 'y ~ x'

### Residuals vs Y (CBCL Outcome) Plot

plot(anxdep_res$level1_resid.pearson, anxdep_res$cbcl_scr_syn_anxdep_r)

### Residuals vs Yhat Plot

plot(anxdep_res$level1_resid.pearson, anxdep_res$cbcl_scr_syn_anxdep_r_predicted)

### Residuals vs Row Plot

plot(as.numeric(rownames(anxdep_res)),anxdep_res$level1_resid.pearson)

### Residuals vs X’s Plots

#age
ggplot(anxdep_res,aes(level1_resid.pearson,interview_age)) + geom_point(color = "black") + geom_smooth(method = "loess")
## `geom_smooth()` using formula = 'y ~ x'

Withdrawn/Depressed

CBCL + AP Longitudinal Models

withdep_zinb_r <- glmm.zinb(cbcl_scr_syn_withdep_r ~ interview_age.c9.y + race_ethnicity.bl + high.educ.bl+ prnt.empl.bl  + neighb_phenx_avg_p.bl.cm + overall.income.bl + sex.bl, random = ~1|abcd_site/subjectid,
              zi_fixed = ~ interview_age.c9.y + race_ethnicity.bl + high.educ.bl+ prnt.empl.bl  + neighb_phenx_avg_p.bl.cm + overall.income.bl + sex.bl, zi_random = ~1|abcd_site, data = df_cc)
## Computational iterations: 15 
## Computational time: 1.983 minutes
summary(withdep_zinb_r)
## Linear mixed-effects model fit by maximum likelihood
##   Data: df_cc 
##   AIC BIC logLik
##    NA  NA     NA
## 
## Random effects:
##  Formula: ~1 | abcd_site
##         (Intercept)
## StdDev:   0.1118689
## 
##  Formula: ~1 | subjectid %in% abcd_site
##         (Intercept)  Residual
## StdDev:    1.206147 0.8065221
## 
## Variance function:
##  Structure: fixed weights
##  Formula: ~invwt 
## Fixed effects:  cbcl_scr_syn_withdep_r ~ interview_age.c9.y + race_ethnicity.bl +      high.educ.bl + prnt.empl.bl + neighb_phenx_avg_p.bl.cm +      overall.income.bl + sex.bl 
##                                              Value  Std.Error    DF    t-value
## (Intercept)                             -0.8180659 0.04435421 14511 -18.443932
## interview_age.c9.y                       0.0936966 0.00648940 14511  14.438407
## race_ethnicity.blHispanic               -0.0168186 0.04588625  9309  -0.366528
## race_ethnicity.blBlack                  -0.3142541 0.05202813  9309  -6.040081
## race_ethnicity.blOther                   0.0169591 0.04710236  9309   0.360047
## high.educ.blBachelor                     0.0774744 0.04008315  9309   1.932843
## high.educ.blSome College                 0.1951026 0.04545551  9309   4.292166
## high.educ.blHS Diploma/GED               0.0918854 0.06365579  9309   1.443472
## high.educ.bl< HS Diploma                 0.1290412 0.08120372  9309   1.589104
## prnt.empl.blStay at Home Parent          0.0784586 0.04011413  9309   1.955884
## prnt.empl.blUnemployed                   0.2379993 0.06480057  9309   3.672796
## prnt.empl.blOther                        0.2794945 0.05706447  9309   4.897872
## neighb_phenx_avg_p.bl.cm                -0.1416896 0.01656597  9309  -8.553053
## overall.income.bl[>=50K & <100K]         0.1598888 0.04039175  9309   3.958453
## overall.income.bl[<50k]                  0.2802247 0.05024633  9309   5.577019
## overall.income.bl[Don't Know or Refuse]  0.2424366 0.06309744  9309   3.842257
## sex.blFemale                            -0.0720659 0.02947982  9309  -2.444584
##                                         p-value
## (Intercept)                              0.0000
## interview_age.c9.y                       0.0000
## race_ethnicity.blHispanic                0.7140
## race_ethnicity.blBlack                   0.0000
## race_ethnicity.blOther                   0.7188
## high.educ.blBachelor                     0.0533
## high.educ.blSome College                 0.0000
## high.educ.blHS Diploma/GED               0.1489
## high.educ.bl< HS Diploma                 0.1121
## prnt.empl.blStay at Home Parent          0.0505
## prnt.empl.blUnemployed                   0.0002
## prnt.empl.blOther                        0.0000
## neighb_phenx_avg_p.bl.cm                 0.0000
## overall.income.bl[>=50K & <100K]         0.0001
## overall.income.bl[<50k]                  0.0000
## overall.income.bl[Don't Know or Refuse]  0.0001
## sex.blFemale                             0.0145
##  Correlation: 
##                                         (Intr) in_.9. rc_t.H rc_t.B rc_t.O
## interview_age.c9.y                      -0.272                            
## race_ethnicity.blHispanic               -0.171  0.008                     
## race_ethnicity.blBlack                  -0.151  0.012  0.357              
## race_ethnicity.blOther                  -0.200  0.005  0.287  0.259       
## high.educ.blBachelor                    -0.312 -0.002 -0.019 -0.012 -0.001
## high.educ.blSome College                -0.215  0.003 -0.115 -0.089 -0.024
## high.educ.blHS Diploma/GED              -0.136  0.010 -0.148 -0.150 -0.008
## high.educ.bl< HS Diploma                -0.089  0.002 -0.180 -0.080 -0.011
## prnt.empl.blStay at Home Parent         -0.154  0.012  0.039  0.091  0.016
## prnt.empl.blUnemployed                  -0.059  0.004  0.005 -0.044  0.008
## prnt.empl.blOther                       -0.070  0.005  0.042  0.010 -0.009
## neighb_phenx_avg_p.bl.cm                -0.151 -0.006  0.048  0.157  0.049
## overall.income.bl[>=50K & <100K]        -0.248  0.000 -0.091 -0.060 -0.009
## overall.income.bl[<50k]                 -0.158  0.001 -0.152 -0.185 -0.080
## overall.income.bl[Don't Know or Refuse] -0.137 -0.001 -0.108 -0.129 -0.064
## sex.blFemale                            -0.320  0.006 -0.009 -0.015 -0.018
##                                         hgh..B hg..SC h..HSD h..<HD p..aHP
## interview_age.c9.y                                                        
## race_ethnicity.blHispanic                                                 
## race_ethnicity.blBlack                                                    
## race_ethnicity.blOther                                                    
## high.educ.blBachelor                                                      
## high.educ.blSome College                 0.464                            
## high.educ.blHS Diploma/GED               0.341  0.508                     
## high.educ.bl< HS Diploma                 0.273  0.421  0.388              
## prnt.empl.blStay at Home Parent         -0.028 -0.014 -0.051 -0.095       
## prnt.empl.blUnemployed                  -0.006 -0.007 -0.068 -0.099  0.152
## prnt.empl.blOther                       -0.010 -0.032 -0.008 -0.017  0.162
## neighb_phenx_avg_p.bl.cm                -0.007  0.061  0.058  0.056  0.029
## overall.income.bl[>=50K & <100K]        -0.174 -0.276 -0.172 -0.117 -0.031
## overall.income.bl[<50k]                 -0.162 -0.419 -0.370 -0.313 -0.053
## overall.income.bl[Don't Know or Refuse] -0.102 -0.256 -0.243 -0.227 -0.078
## sex.blFemale                             0.017  0.024  0.015 -0.002 -0.005
##                                         prn..U prn..O n___.. o..[&< o..[<5
## interview_age.c9.y                                                        
## race_ethnicity.blHispanic                                                 
## race_ethnicity.blBlack                                                    
## race_ethnicity.blOther                                                    
## high.educ.blBachelor                                                      
## high.educ.blSome College                                                  
## high.educ.blHS Diploma/GED                                                
## high.educ.bl< HS Diploma                                                  
## prnt.empl.blStay at Home Parent                                           
## prnt.empl.blUnemployed                                                    
## prnt.empl.blOther                        0.135                            
## neighb_phenx_avg_p.bl.cm                 0.020  0.006                     
## overall.income.bl[>=50K & <100K]        -0.017 -0.051  0.082              
## overall.income.bl[<50k]                 -0.100 -0.142  0.156  0.512       
## overall.income.bl[Don't Know or Refuse] -0.080 -0.100  0.089  0.369  0.496
## sex.blFemale                             0.018  0.020  0.030 -0.008 -0.006
##                                         o..KoR
## interview_age.c9.y                            
## race_ethnicity.blHispanic                     
## race_ethnicity.blBlack                        
## race_ethnicity.blOther                        
## high.educ.blBachelor                          
## high.educ.blSome College                      
## high.educ.blHS Diploma/GED                    
## high.educ.bl< HS Diploma                      
## prnt.empl.blStay at Home Parent               
## prnt.empl.blUnemployed                        
## prnt.empl.blOther                             
## neighb_phenx_avg_p.bl.cm                      
## overall.income.bl[>=50K & <100K]              
## overall.income.bl[<50k]                       
## overall.income.bl[Don't Know or Refuse]       
## sex.blFemale                             0.007
## 
## Standardized Within-Group Residuals:
##        Min         Q1        Med         Q3        Max 
## -2.7594639 -0.6292970 -0.5046082  0.3868743  4.6633112 
## 
## Number of Observations: 23857
## Number of Groups: 
##                abcd_site subjectid %in% abcd_site 
##                       21                     9345
summary(withdep_zinb_r$zi.fit)
## Linear mixed-effects model fit by maximum likelihood
##   Data: data 
##   AIC BIC logLik
##    NA  NA     NA
## 
## Random effects:
##  Formula: ~1 | abcd_site
##         (Intercept)  Residual
## StdDev:   0.5317109 0.2651273
## 
## Variance function:
##  Structure: fixed weights
##  Formula: ~invwt 
## Fixed effects:  zp ~ interview_age.c9.y + race_ethnicity.bl + high.educ.bl +      prnt.empl.bl + neighb_phenx_avg_p.bl.cm + overall.income.bl +      sex.bl 
##                                             Value  Std.Error    DF   t-value
## (Intercept)                             -3.417296 0.12006310 23820 -28.46250
## interview_age.c9.y                      -0.203403 0.01062229 23820 -19.14873
## race_ethnicity.blHispanic                0.168921 0.03310490 23820   5.10261
## race_ethnicity.blBlack                   0.582184 0.03568706 23820  16.31358
## race_ethnicity.blOther                   0.012980 0.03537404 23820   0.36692
## high.educ.blBachelor                     0.287193 0.02669407 23820  10.75867
## high.educ.blSome College                 0.314541 0.03248792 23820   9.68179
## high.educ.blHS Diploma/GED               0.219772 0.04896009 23820   4.48880
## high.educ.bl< HS Diploma                 0.664195 0.05823189 23820  11.40603
## prnt.empl.blStay at Home Parent         -0.612322 0.03525756 23820 -17.36710
## prnt.empl.blUnemployed                  -0.139876 0.05008273 23820  -2.79290
## prnt.empl.blOther                       -0.033356 0.04308024 23820  -0.77428
## neighb_phenx_avg_p.bl.cm                 0.460628 0.01395288 23820  33.01309
## overall.income.bl[>=50K & <100K]        -0.493979 0.02931562 23820 -16.85036
## overall.income.bl[<50k]                 -0.831531 0.03886008 23820 -21.39807
## overall.income.bl[Don't Know or Refuse]  0.316985 0.03751395 23820   8.44979
## sex.blFemale                             0.218279 0.02088107 23820  10.45342
##                                         p-value
## (Intercept)                              0.0000
## interview_age.c9.y                       0.0000
## race_ethnicity.blHispanic                0.0000
## race_ethnicity.blBlack                   0.0000
## race_ethnicity.blOther                   0.7137
## high.educ.blBachelor                     0.0000
## high.educ.blSome College                 0.0000
## high.educ.blHS Diploma/GED               0.0000
## high.educ.bl< HS Diploma                 0.0000
## prnt.empl.blStay at Home Parent          0.0000
## prnt.empl.blUnemployed                   0.0052
## prnt.empl.blOther                        0.4388
## neighb_phenx_avg_p.bl.cm                 0.0000
## overall.income.bl[>=50K & <100K]         0.0000
## overall.income.bl[<50k]                  0.0000
## overall.income.bl[Don't Know or Refuse]  0.0000
## sex.blFemale                             0.0000
##  Correlation: 
##                                         (Intr) in_.9. rc_t.H rc_t.B rc_t.O
## interview_age.c9.y                      -0.144                            
## race_ethnicity.blHispanic               -0.053  0.015                     
## race_ethnicity.blBlack                  -0.049  0.031  0.346              
## race_ethnicity.blOther                  -0.058  0.010  0.276  0.226       
## high.educ.blBachelor                    -0.086 -0.017 -0.006 -0.001  0.018
## high.educ.blSome College                -0.060 -0.004 -0.115 -0.107  0.009
## high.educ.blHS Diploma/GED              -0.036  0.016 -0.135 -0.144  0.005
## high.educ.bl< HS Diploma                -0.026  0.009 -0.160 -0.079  0.005
## prnt.empl.blStay at Home Parent         -0.032  0.016  0.038  0.084  0.018
## prnt.empl.blUnemployed                  -0.016  0.004  0.013 -0.027  0.017
## prnt.empl.blOther                       -0.012  0.007  0.038 -0.013 -0.023
## neighb_phenx_avg_p.bl.cm                -0.065 -0.004  0.024  0.117  0.036
## overall.income.bl[>=50K & <100K]        -0.043  0.005 -0.103 -0.090 -0.025
## overall.income.bl[<50k]                 -0.026 -0.002 -0.126 -0.198 -0.062
## overall.income.bl[Don't Know or Refuse] -0.029  0.000 -0.113 -0.173 -0.067
## sex.blFemale                            -0.092  0.006 -0.005 -0.017 -0.009
##                                         hgh..B hg..SC h..HSD h..<HD p..aHP
## interview_age.c9.y                                                        
## race_ethnicity.blHispanic                                                 
## race_ethnicity.blBlack                                                    
## race_ethnicity.blOther                                                    
## high.educ.blBachelor                                                      
## high.educ.blSome College                 0.441                            
## high.educ.blHS Diploma/GED               0.302  0.441                     
## high.educ.bl< HS Diploma                 0.260  0.398  0.356              
## prnt.empl.blStay at Home Parent         -0.037 -0.021 -0.046 -0.104       
## prnt.empl.blUnemployed                  -0.024 -0.016 -0.065 -0.130  0.112
## prnt.empl.blOther                       -0.028 -0.042 -0.025 -0.045  0.115
## neighb_phenx_avg_p.bl.cm                -0.005  0.046  0.033  0.047  0.008
## overall.income.bl[>=50K & <100K]        -0.140 -0.274 -0.161 -0.109 -0.018
## overall.income.bl[<50k]                 -0.113 -0.360 -0.324 -0.321 -0.027
## overall.income.bl[Don't Know or Refuse] -0.093 -0.279 -0.271 -0.273 -0.076
## sex.blFemale                             0.008  0.019  0.007 -0.013  0.002
##                                         prn..U prn..O n___.. o..[&< o..[<5
## interview_age.c9.y                                                        
## race_ethnicity.blHispanic                                                 
## race_ethnicity.blBlack                                                    
## race_ethnicity.blOther                                                    
## high.educ.blBachelor                                                      
## high.educ.blSome College                                                  
## high.educ.blHS Diploma/GED                                                
## high.educ.bl< HS Diploma                                                  
## prnt.empl.blStay at Home Parent                                           
## prnt.empl.blUnemployed                                                    
## prnt.empl.blOther                        0.116                            
## neighb_phenx_avg_p.bl.cm                 0.032 -0.020                     
## overall.income.bl[>=50K & <100K]        -0.010 -0.049  0.071              
## overall.income.bl[<50k]                 -0.077 -0.115  0.116  0.403       
## overall.income.bl[Don't Know or Refuse] -0.079 -0.108  0.074  0.364  0.489
## sex.blFemale                             0.030  0.009  0.029 -0.020 -0.015
##                                         o..KoR
## interview_age.c9.y                            
## race_ethnicity.blHispanic                     
## race_ethnicity.blBlack                        
## race_ethnicity.blOther                        
## high.educ.blBachelor                          
## high.educ.blSome College                      
## high.educ.blHS Diploma/GED                    
## high.educ.bl< HS Diploma                      
## prnt.empl.blStay at Home Parent               
## prnt.empl.blUnemployed                        
## prnt.empl.blOther                             
## neighb_phenx_avg_p.bl.cm                      
## overall.income.bl[>=50K & <100K]              
## overall.income.bl[<50k]                       
## overall.income.bl[Don't Know or Refuse]       
## sex.blFemale                             0.000
## 
## Standardized Within-Group Residuals:
##        Min         Q1        Med         Q3        Max 
## -2.6507402 -0.4949076  0.1660230  0.2645690 42.3666495 
## 
## Number of Observations: 23857
## Number of Groups: 21
anova(withdep_zinb_r)
##                          numDF denDF   F-value p-value
## (Intercept)                  1 14511 242.07620  <.0001
## interview_age.c9.y           1 14511 201.42562  <.0001
## race_ethnicity.bl            3  9309   8.11214  <.0001
## high.educ.bl                 4  9309  28.37207  <.0001
## prnt.empl.bl                 3  9309  18.09865  <.0001
## neighb_phenx_avg_p.bl.cm     1  9309  90.02855  <.0001
## overall.income.bl            3  9309  11.24198  <.0001
## sex.bl                       1  9309   5.97599  0.0145

Assumption checking for ZINB Models

#Check outlier/residuals with this df
withdep_res <- df_cc
withdep_res$level1_resid.raw <- residuals(withdep_zinb_r)
withdep_res$level1_resid.pearson <- residuals(withdep_zinb_r, type="pearson")
#Add predicted values (Yhat)
withdep_res$cbcl_scr_syn_withdep_r_predicted <- predict(withdep_zinb_r,withdep_res,type="response")
#Incidence
withdep_res$incidence <- estimate.probability(withdep_res$cbcl_scr_syn_withdep_r, method="empirical")

#Plotting histogram of residuals, but may be skewed since using ZINB, so make sure to check below plots
hist(withdep_res$level1_resid.pearson)

### Incidence vs. X’s Plots

#age
ggplot(withdep_res,aes(incidence,interview_age)) + geom_point(color = "black") + geom_smooth(method = "loess")
## `geom_smooth()` using formula = 'y ~ x'
## Warning in simpleLoess(y, x, w, span, degree = degree, parametric = parametric,
## : pseudoinverse used at 0
## Warning in simpleLoess(y, x, w, span, degree = degree, parametric = parametric,
## : neighborhood radius 7.4176e-05
## Warning in simpleLoess(y, x, w, span, degree = degree, parametric = parametric,
## : reciprocal condition number 1.0709e-14
## Warning in simpleLoess(y, x, w, span, degree = degree, parametric = parametric,
## : There are other near singularities as well. 1.3755e-09
## Warning in predLoess(object$y, object$x, newx = if (is.null(newdata)) object$x
## else if (is.data.frame(newdata))
## as.matrix(model.frame(delete.response(terms(object)), : pseudoinverse used at 0
## Warning in predLoess(object$y, object$x, newx = if (is.null(newdata)) object$x
## else if (is.data.frame(newdata))
## as.matrix(model.frame(delete.response(terms(object)), : neighborhood radius
## 7.4176e-05
## Warning in predLoess(object$y, object$x, newx = if (is.null(newdata)) object$x
## else if (is.data.frame(newdata))
## as.matrix(model.frame(delete.response(terms(object)), : reciprocal condition
## number 1.0709e-14
## Warning in predLoess(object$y, object$x, newx = if (is.null(newdata)) object$x
## else if (is.data.frame(newdata))
## as.matrix(model.frame(delete.response(terms(object)), : There are other near
## singularities as well. 1.3755e-09

### Residuals vs Y (CBCL Outcome) Plot

plot(withdep_res$level1_resid.pearson, withdep_res$cbcl_scr_syn_withdep_r)

### Residuals vs Yhat Plot

plot(withdep_res$level1_resid.pearson, withdep_res$cbcl_scr_syn_withdep_r_predicted)

### Residuals vs Row Plot

plot(as.numeric(rownames(withdep_res)),withdep_res$level1_resid.pearson)

### Residuals vs X’s Plots

#age
ggplot(withdep_res,aes(level1_resid.pearson,interview_age)) + geom_point(color = "black") + geom_smooth(method = "loess")
## `geom_smooth()` using formula = 'y ~ x'

Attention

CBCL + AP Longitudinal Models

attention_zinb_r <- glmm.zinb(cbcl_scr_syn_attention_r ~ interview_age.c9.y + race_ethnicity.bl + high.educ.bl+ prnt.empl.bl  + neighb_phenx_avg_p.bl.cm + overall.income.bl + sex.bl, random = ~1|abcd_site/subjectid,
              zi_fixed = ~ interview_age.c9.y + race_ethnicity.bl + high.educ.bl+ prnt.empl.bl  + neighb_phenx_avg_p.bl.cm + overall.income.bl + sex.bl, zi_random = ~1|abcd_site, data = df_cc)
## Computational iterations: 14 
## Computational time: 2.015 minutes
summary(attention_zinb_r)
## Linear mixed-effects model fit by maximum likelihood
##   Data: df_cc 
##   AIC BIC logLik
##    NA  NA     NA
## 
## Random effects:
##  Formula: ~1 | abcd_site
##         (Intercept)
## StdDev:   0.1358172
## 
##  Formula: ~1 | subjectid %in% abcd_site
##         (Intercept)  Residual
## StdDev:    1.140061 0.9079128
## 
## Variance function:
##  Structure: fixed weights
##  Formula: ~invwt 
## Fixed effects:  cbcl_scr_syn_attention_r ~ interview_age.c9.y + race_ethnicity.bl +      high.educ.bl + prnt.empl.bl + neighb_phenx_avg_p.bl.cm +      overall.income.bl + sex.bl 
##                                              Value  Std.Error    DF    t-value
## (Intercept)                              0.6590844 0.04349792 14511  15.152091
## interview_age.c9.y                      -0.0353176 0.00462549 14511  -7.635430
## race_ethnicity.blHispanic               -0.0593185 0.04124066  9309  -1.438350
## race_ethnicity.blBlack                  -0.0598611 0.04567308  9309  -1.310643
## race_ethnicity.blOther                   0.0607076 0.04192187  9309   1.448112
## high.educ.blBachelor                     0.1545420 0.03529460  9309   4.378632
## high.educ.blSome College                 0.1952206 0.04032712  9309   4.840926
## high.educ.blHS Diploma/GED               0.0643164 0.05661169  9309   1.136097
## high.educ.bl< HS Diploma                 0.0359965 0.07347570  9309   0.489911
## prnt.empl.blStay at Home Parent         -0.0557609 0.03592310  9309  -1.552229
## prnt.empl.blUnemployed                   0.1391686 0.05812674  9309   2.394227
## prnt.empl.blOther                        0.1869133 0.05117346  9309   3.652545
## neighb_phenx_avg_p.bl.cm                -0.1152415 0.01481732  9309  -7.777481
## overall.income.bl[>=50K & <100K]         0.0809051 0.03573678  9309   2.263918
## overall.income.bl[<50k]                  0.1529989 0.04473905  9309   3.419808
## overall.income.bl[Don't Know or Refuse]  0.0469490 0.05612350  9309   0.836530
## sex.blFemale                            -0.4363546 0.02617693  9309 -16.669433
##                                         p-value
## (Intercept)                              0.0000
## interview_age.c9.y                       0.0000
## race_ethnicity.blHispanic                0.1504
## race_ethnicity.blBlack                   0.1900
## race_ethnicity.blOther                   0.1476
## high.educ.blBachelor                     0.0000
## high.educ.blSome College                 0.0000
## high.educ.blHS Diploma/GED               0.2559
## high.educ.bl< HS Diploma                 0.6242
## prnt.empl.blStay at Home Parent          0.1206
## prnt.empl.blUnemployed                   0.0167
## prnt.empl.blOther                        0.0003
## neighb_phenx_avg_p.bl.cm                 0.0000
## overall.income.bl[>=50K & <100K]         0.0236
## overall.income.bl[<50k]                  0.0006
## overall.income.bl[Don't Know or Refuse]  0.4029
## sex.blFemale                             0.0000
##  Correlation: 
##                                         (Intr) in_.9. rc_t.H rc_t.B rc_t.O
## interview_age.c9.y                      -0.188                            
## race_ethnicity.blHispanic               -0.161  0.006                     
## race_ethnicity.blBlack                  -0.142  0.009  0.349              
## race_ethnicity.blOther                  -0.183  0.003  0.284  0.260       
## high.educ.blBachelor                    -0.280 -0.002 -0.016 -0.009  0.001
## high.educ.blSome College                -0.192  0.002 -0.107 -0.079 -0.021
## high.educ.blHS Diploma/GED              -0.119  0.006 -0.141 -0.144 -0.006
## high.educ.bl< HS Diploma                -0.078  0.001 -0.168 -0.075 -0.010
## prnt.empl.blStay at Home Parent         -0.136  0.008  0.043  0.091  0.016
## prnt.empl.blUnemployed                  -0.051  0.002  0.009 -0.042  0.009
## prnt.empl.blOther                       -0.060  0.004  0.041  0.009 -0.014
## neighb_phenx_avg_p.bl.cm                -0.140 -0.004  0.043  0.153  0.048
## overall.income.bl[>=50K & <100K]        -0.218  0.000 -0.092 -0.063 -0.012
## overall.income.bl[<50k]                 -0.136  0.000 -0.148 -0.188 -0.082
## overall.income.bl[Don't Know or Refuse] -0.120 -0.001 -0.101 -0.128 -0.059
## sex.blFemale                            -0.280  0.006 -0.006 -0.017 -0.016
##                                         hgh..B hg..SC h..HSD h..<HD p..aHP
## interview_age.c9.y                                                        
## race_ethnicity.blHispanic                                                 
## race_ethnicity.blBlack                                                    
## race_ethnicity.blOther                                                    
## high.educ.blBachelor                                                      
## high.educ.blSome College                 0.460                            
## high.educ.blHS Diploma/GED               0.338  0.503                     
## high.educ.bl< HS Diploma                 0.265  0.411  0.379              
## prnt.empl.blStay at Home Parent         -0.026 -0.013 -0.050 -0.092       
## prnt.empl.blUnemployed                  -0.009 -0.010 -0.069 -0.099  0.148
## prnt.empl.blOther                       -0.011 -0.033 -0.013 -0.020  0.158
## neighb_phenx_avg_p.bl.cm                -0.005  0.062  0.058  0.057  0.027
## overall.income.bl[>=50K & <100K]        -0.173 -0.278 -0.175 -0.115 -0.032
## overall.income.bl[<50k]                 -0.161 -0.421 -0.370 -0.311 -0.055
## overall.income.bl[Don't Know or Refuse] -0.101 -0.255 -0.241 -0.220 -0.079
## sex.blFemale                             0.014  0.021  0.014 -0.003 -0.005
##                                         prn..U prn..O n___.. o..[&< o..[<5
## interview_age.c9.y                                                        
## race_ethnicity.blHispanic                                                 
## race_ethnicity.blBlack                                                    
## race_ethnicity.blOther                                                    
## high.educ.blBachelor                                                      
## high.educ.blSome College                                                  
## high.educ.blHS Diploma/GED                                                
## high.educ.bl< HS Diploma                                                  
## prnt.empl.blStay at Home Parent                                           
## prnt.empl.blUnemployed                                                    
## prnt.empl.blOther                        0.133                            
## neighb_phenx_avg_p.bl.cm                 0.021  0.002                     
## overall.income.bl[>=50K & <100K]        -0.016 -0.050  0.085              
## overall.income.bl[<50k]                 -0.102 -0.138  0.156  0.508       
## overall.income.bl[Don't Know or Refuse] -0.079 -0.099  0.088  0.363  0.488
## sex.blFemale                             0.019  0.020  0.030 -0.006 -0.008
##                                         o..KoR
## interview_age.c9.y                            
## race_ethnicity.blHispanic                     
## race_ethnicity.blBlack                        
## race_ethnicity.blOther                        
## high.educ.blBachelor                          
## high.educ.blSome College                      
## high.educ.blHS Diploma/GED                    
## high.educ.bl< HS Diploma                      
## prnt.empl.blStay at Home Parent               
## prnt.empl.blUnemployed                        
## prnt.empl.blOther                             
## neighb_phenx_avg_p.bl.cm                      
## overall.income.bl[>=50K & <100K]              
## overall.income.bl[<50k]                       
## overall.income.bl[Don't Know or Refuse]       
## sex.blFemale                             0.006
## 
## Standardized Within-Group Residuals:
##        Min         Q1        Med         Q3        Max 
## -2.5134447 -0.7118173 -0.2810833  0.4195582  3.8913914 
## 
## Number of Observations: 23857
## Number of Groups: 
##                abcd_site subjectid %in% abcd_site 
##                       21                     9345
summary(attention_zinb_r$zi.fit)
## Linear mixed-effects model fit by maximum likelihood
##   Data: data 
##   AIC BIC logLik
##    NA  NA     NA
## 
## Random effects:
##  Formula: ~1 | abcd_site
##         (Intercept)  Residual
## StdDev:   0.5725051 0.3970165
## 
## Variance function:
##  Structure: fixed weights
##  Formula: ~invwt 
## Fixed effects:  zp ~ interview_age.c9.y + race_ethnicity.bl + high.educ.bl +      prnt.empl.bl + neighb_phenx_avg_p.bl.cm + overall.income.bl +      sex.bl 
##                                             Value  Std.Error    DF   t-value
## (Intercept)                             -5.592195 0.14585374 23820 -38.34112
## interview_age.c9.y                       0.152881 0.02102163 23820   7.27254
## race_ethnicity.blHispanic                0.242567 0.07250235 23820   3.34564
## race_ethnicity.blBlack                   0.799979 0.06953310 23820  11.50501
## race_ethnicity.blOther                   0.456849 0.07149781 23820   6.38969
## high.educ.blBachelor                     0.065373 0.06619424 23820   0.98760
## high.educ.blSome College                 0.366416 0.07268933 23820   5.04085
## high.educ.blHS Diploma/GED               1.038075 0.08643505 23820  12.00988
## high.educ.bl< HS Diploma                 2.022974 0.09075926 23820  22.28945
## prnt.empl.blStay at Home Parent          0.185903 0.05483332 23820   3.39034
## prnt.empl.blUnemployed                  -0.379432 0.09197028 23820  -4.12560
## prnt.empl.blOther                       -0.139438 0.08591830 23820  -1.62292
## neighb_phenx_avg_p.bl.cm                 0.457926 0.02538887 23820  18.03647
## overall.income.bl[>=50K & <100K]        -0.367172 0.06789232 23820  -5.40816
## overall.income.bl[<50k]                 -0.056680 0.07559964 23820  -0.74974
## overall.income.bl[Don't Know or Refuse]  0.099041 0.08592882 23820   1.15259
## sex.blFemale                             0.744418 0.04397254 23820  16.92916
##                                         p-value
## (Intercept)                              0.0000
## interview_age.c9.y                       0.0000
## race_ethnicity.blHispanic                0.0008
## race_ethnicity.blBlack                   0.0000
## race_ethnicity.blOther                   0.0000
## high.educ.blBachelor                     0.3234
## high.educ.blSome College                 0.0000
## high.educ.blHS Diploma/GED               0.0000
## high.educ.bl< HS Diploma                 0.0000
## prnt.empl.blStay at Home Parent          0.0007
## prnt.empl.blUnemployed                   0.0000
## prnt.empl.blOther                        0.1046
## neighb_phenx_avg_p.bl.cm                 0.0000
## overall.income.bl[>=50K & <100K]         0.0000
## overall.income.bl[<50k]                  0.4534
## overall.income.bl[Don't Know or Refuse]  0.2491
## sex.blFemale                             0.0000
##  Correlation: 
##                                         (Intr) in_.9. rc_t.H rc_t.B rc_t.O
## interview_age.c9.y                      -0.293                            
## race_ethnicity.blHispanic               -0.114  0.016                     
## race_ethnicity.blBlack                  -0.115  0.035  0.471              
## race_ethnicity.blOther                  -0.130  0.011  0.376  0.370       
## high.educ.blBachelor                    -0.167 -0.009 -0.005  0.008  0.017
## high.educ.blSome College                -0.127  0.011 -0.100 -0.079 -0.009
## high.educ.blHS Diploma/GED              -0.101  0.025 -0.154 -0.146 -0.004
## high.educ.bl< HS Diploma                -0.092  0.016 -0.190 -0.113 -0.011
## prnt.empl.blStay at Home Parent         -0.088  0.024  0.051  0.102  0.038
## prnt.empl.blUnemployed                  -0.032  0.008  0.018 -0.032 -0.004
## prnt.empl.blOther                       -0.029  0.013  0.049 -0.008 -0.021
## neighb_phenx_avg_p.bl.cm                -0.104  0.001  0.025  0.128  0.032
## overall.income.bl[>=50K & <100K]        -0.090  0.007 -0.091 -0.100 -0.046
## overall.income.bl[<50k]                 -0.068  0.003 -0.161 -0.223 -0.102
## overall.income.bl[Don't Know or Refuse] -0.064  0.025 -0.131 -0.164 -0.087
## sex.blFemale                            -0.200  0.018  0.002 -0.016 -0.011
##                                         hgh..B hg..SC h..HSD h..<HD p..aHP
## interview_age.c9.y                                                        
## race_ethnicity.blHispanic                                                 
## race_ethnicity.blBlack                                                    
## race_ethnicity.blOther                                                    
## high.educ.blBachelor                                                      
## high.educ.blSome College                 0.485                            
## high.educ.blHS Diploma/GED               0.416  0.618                     
## high.educ.bl< HS Diploma                 0.400  0.613  0.639              
## prnt.empl.blStay at Home Parent         -0.031 -0.016 -0.048 -0.108       
## prnt.empl.blUnemployed                  -0.018 -0.004 -0.060 -0.107  0.181
## prnt.empl.blOther                       -0.020 -0.019 -0.007 -0.047  0.176
## neighb_phenx_avg_p.bl.cm                -0.011  0.028  0.033  0.068  0.043
## overall.income.bl[>=50K & <100K]        -0.155 -0.311 -0.242 -0.210 -0.018
## overall.income.bl[<50k]                 -0.151 -0.432 -0.451 -0.464 -0.041
## overall.income.bl[Don't Know or Refuse] -0.116 -0.324 -0.359 -0.377 -0.090
## sex.blFemale                             0.007  0.009  0.008  0.000  0.017
##                                         prn..U prn..O n___.. o..[&< o..[<5
## interview_age.c9.y                                                        
## race_ethnicity.blHispanic                                                 
## race_ethnicity.blBlack                                                    
## race_ethnicity.blOther                                                    
## high.educ.blBachelor                                                      
## high.educ.blSome College                                                  
## high.educ.blHS Diploma/GED                                                
## high.educ.bl< HS Diploma                                                  
## prnt.empl.blStay at Home Parent                                           
## prnt.empl.blUnemployed                                                    
## prnt.empl.blOther                        0.139                            
## neighb_phenx_avg_p.bl.cm                 0.036 -0.014                     
## overall.income.bl[>=50K & <100K]        -0.009 -0.040  0.051              
## overall.income.bl[<50k]                 -0.050 -0.115  0.124  0.528       
## overall.income.bl[Don't Know or Refuse] -0.053 -0.098  0.072  0.433  0.650
## sex.blFemale                             0.036  0.010  0.021 -0.018 -0.008
##                                         o..KoR
## interview_age.c9.y                            
## race_ethnicity.blHispanic                     
## race_ethnicity.blBlack                        
## race_ethnicity.blOther                        
## high.educ.blBachelor                          
## high.educ.blSome College                      
## high.educ.blHS Diploma/GED                    
## high.educ.bl< HS Diploma                      
## prnt.empl.blStay at Home Parent               
## prnt.empl.blUnemployed                        
## prnt.empl.blOther                             
## neighb_phenx_avg_p.bl.cm                      
## overall.income.bl[>=50K & <100K]              
## overall.income.bl[<50k]                       
## overall.income.bl[Don't Know or Refuse]       
## sex.blFemale                            -0.018
## 
## Standardized Within-Group Residuals:
##        Min         Q1        Med         Q3        Max 
## -1.8033304 -0.2751430 -0.1807364  0.1612190 39.7857867 
## 
## Number of Observations: 23857
## Number of Groups: 21
anova(attention_zinb_r)
##                          numDF denDF   F-value p-value
## (Intercept)                  1 14511 301.71089  <.0001
## interview_age.c9.y           1 14511  58.55577  <.0001
## race_ethnicity.bl            3  9309   4.71680  0.0027
## high.educ.bl                 4  9309  21.82550  <.0001
## prnt.empl.bl                 3  9309  11.49861  <.0001
## neighb_phenx_avg_p.bl.cm     1  9309  62.26850  <.0001
## overall.income.bl            3  9309   3.97771  0.0076
## sex.bl                       1  9309 277.87000  <.0001

Assumption checking for ZINB Models

#Check outlier/residuals with this df
attention_res <- df_cc
attention_res$level1_resid.raw <- residuals(attention_zinb_r)
attention_res$level1_resid.pearson <- residuals(attention_zinb_r, type="pearson")
#Add predicted values (Yhat)
attention_res$cbcl_scr_syn_attention_r_predicted <- predict(attention_zinb_r,attention_res,type="response")
#Incidence
attention_res$incidence <- estimate.probability(attention_res$cbcl_scr_syn_attention_r, method="empirical")

#Plotting histogram of residuals, but may be skewed since using ZINB, so make sure to check below plots
hist(attention_res$level1_resid.pearson)

### Incidence vs. X’s Plots

#age
ggplot(attention_res,aes(incidence,interview_age)) + geom_point(color = "black") + geom_smooth(method = "loess")
## `geom_smooth()` using formula = 'y ~ x'

### Residuals vs Y (CBCL Outcome) Plot

plot(attention_res$level1_resid.pearson, attention_res$cbcl_scr_syn_attention_r)

### Residuals vs Yhat Plot

plot(attention_res$level1_resid.pearson, attention_res$cbcl_scr_syn_attention_r_predicted)

### Residuals vs Row Plot

plot(as.numeric(rownames(attention_res)),attention_res$level1_resid.pearson)

### Residuals vs X’s Plots

#age
ggplot(attention_res,aes(level1_resid.pearson,interview_age)) + geom_point(color = "black") + geom_smooth(method = "loess")
## `geom_smooth()` using formula = 'y ~ x'

Rulebreak

CBCL + AP Longitudinal Models

rulebreak_zinb_r <- glmm.zinb(cbcl_scr_syn_rulebreak_r ~ interview_age.c9.y + race_ethnicity.bl + high.educ.bl+ prnt.empl.bl  + neighb_phenx_avg_p.bl.cm + overall.income.bl + sex.bl, random = ~1|abcd_site/subjectid,
              zi_fixed = ~ interview_age.c9.y + race_ethnicity.bl + high.educ.bl+ prnt.empl.bl  + neighb_phenx_avg_p.bl.cm + overall.income.bl + sex.bl, zi_random = ~1|abcd_site, data = df_cc)
## Computational iterations: 15 
## Computational time: 2.045 minutes
summary(rulebreak_zinb_r)
## Linear mixed-effects model fit by maximum likelihood
##   Data: df_cc 
##   AIC BIC logLik
##    NA  NA     NA
## 
## Random effects:
##  Formula: ~1 | abcd_site
##         (Intercept)
## StdDev:   0.1243575
## 
##  Formula: ~1 | subjectid %in% abcd_site
##         (Intercept) Residual
## StdDev:    1.193925 0.791083
## 
## Variance function:
##  Structure: fixed weights
##  Formula: ~invwt 
## Fixed effects:  cbcl_scr_syn_rulebreak_r ~ interview_age.c9.y + race_ethnicity.bl +      high.educ.bl + prnt.empl.bl + neighb_phenx_avg_p.bl.cm +      overall.income.bl + sex.bl 
##                                              Value  Std.Error    DF    t-value
## (Intercept)                             -0.5465811 0.04542784 14511 -12.031853
## interview_age.c9.y                      -0.0317480 0.00637374 14511  -4.981059
## race_ethnicity.blHispanic               -0.0764440 0.04559273  9309  -1.676670
## race_ethnicity.blBlack                   0.0895467 0.05001752  9309   1.790307
## race_ethnicity.blOther                   0.0799822 0.04670067  9309   1.712657
## high.educ.blBachelor                     0.1805187 0.03983589  9309   4.531561
## high.educ.blSome College                 0.3288013 0.04485652  9309   7.330066
## high.educ.blHS Diploma/GED               0.1888618 0.06238413  9309   3.027401
## high.educ.bl< HS Diploma                 0.2258188 0.08010467  9309   2.819046
## prnt.empl.blStay at Home Parent         -0.0333214 0.04008685  9309  -0.831230
## prnt.empl.blUnemployed                   0.1785418 0.06336153  9309   2.817827
## prnt.empl.blOther                        0.2555895 0.05601209  9309   4.563112
## neighb_phenx_avg_p.bl.cm                -0.1183948 0.01635967  9309  -7.236994
## overall.income.bl[>=50K & <100K]         0.1480522 0.04016668  9309   3.685946
## overall.income.bl[<50k]                  0.3286485 0.04956715  9309   6.630370
## overall.income.bl[Don't Know or Refuse]  0.2581262 0.06198526  9309   4.164316
## sex.blFemale                            -0.4248865 0.02923940  9309 -14.531300
##                                         p-value
## (Intercept)                              0.0000
## interview_age.c9.y                       0.0000
## race_ethnicity.blHispanic                0.0936
## race_ethnicity.blBlack                   0.0734
## race_ethnicity.blOther                   0.0868
## high.educ.blBachelor                     0.0000
## high.educ.blSome College                 0.0000
## high.educ.blHS Diploma/GED               0.0025
## high.educ.bl< HS Diploma                 0.0048
## prnt.empl.blStay at Home Parent          0.4059
## prnt.empl.blUnemployed                   0.0048
## prnt.empl.blOther                        0.0000
## neighb_phenx_avg_p.bl.cm                 0.0000
## overall.income.bl[>=50K & <100K]         0.0002
## overall.income.bl[<50k]                  0.0000
## overall.income.bl[Don't Know or Refuse]  0.0000
## sex.blFemale                             0.0000
##  Correlation: 
##                                         (Intr) in_.9. rc_t.H rc_t.B rc_t.O
## interview_age.c9.y                      -0.248                            
## race_ethnicity.blHispanic               -0.169  0.008                     
## race_ethnicity.blBlack                  -0.152  0.012  0.361              
## race_ethnicity.blOther                  -0.197  0.005  0.288  0.270       
## high.educ.blBachelor                    -0.308 -0.002 -0.020 -0.011  0.001
## high.educ.blSome College                -0.216  0.003 -0.107 -0.080 -0.021
## high.educ.blHS Diploma/GED              -0.136  0.008 -0.144 -0.148 -0.009
## high.educ.bl< HS Diploma                -0.089  0.001 -0.173 -0.079 -0.013
## prnt.empl.blStay at Home Parent         -0.146  0.011  0.041  0.093  0.016
## prnt.empl.blUnemployed                  -0.055  0.003  0.010 -0.040  0.010
## prnt.empl.blOther                       -0.065  0.005  0.047  0.015 -0.012
## neighb_phenx_avg_p.bl.cm                -0.149 -0.004  0.052  0.157  0.054
## overall.income.bl[>=50K & <100K]        -0.241 -0.002 -0.094 -0.068 -0.016
## overall.income.bl[<50k]                 -0.153 -0.001 -0.152 -0.190 -0.082
## overall.income.bl[Don't Know or Refuse] -0.136 -0.001 -0.106 -0.131 -0.059
## sex.blFemale                            -0.290  0.007 -0.009 -0.020 -0.016
##                                         hgh..B hg..SC h..HSD h..<HD p..aHP
## interview_age.c9.y                                                        
## race_ethnicity.blHispanic                                                 
## race_ethnicity.blBlack                                                    
## race_ethnicity.blOther                                                    
## high.educ.blBachelor                                                      
## high.educ.blSome College                 0.473                            
## high.educ.blHS Diploma/GED               0.351  0.516                     
## high.educ.bl< HS Diploma                 0.279  0.424  0.389              
## prnt.empl.blStay at Home Parent         -0.030 -0.017 -0.051 -0.099       
## prnt.empl.blUnemployed                  -0.008 -0.008 -0.068 -0.100  0.152
## prnt.empl.blOther                       -0.013 -0.034 -0.016 -0.023  0.162
## neighb_phenx_avg_p.bl.cm                -0.005  0.061  0.055  0.054  0.032
## overall.income.bl[>=50K & <100K]        -0.171 -0.275 -0.175 -0.117 -0.028
## overall.income.bl[<50k]                 -0.161 -0.420 -0.368 -0.312 -0.051
## overall.income.bl[Don't Know or Refuse] -0.101 -0.259 -0.245 -0.223 -0.071
## sex.blFemale                             0.011  0.021  0.014 -0.003 -0.007
##                                         prn..U prn..O n___.. o..[&< o..[<5
## interview_age.c9.y                                                        
## race_ethnicity.blHispanic                                                 
## race_ethnicity.blBlack                                                    
## race_ethnicity.blOther                                                    
## high.educ.blBachelor                                                      
## high.educ.blSome College                                                  
## high.educ.blHS Diploma/GED                                                
## high.educ.bl< HS Diploma                                                  
## prnt.empl.blStay at Home Parent                                           
## prnt.empl.blUnemployed                                                    
## prnt.empl.blOther                        0.138                            
## neighb_phenx_avg_p.bl.cm                 0.025  0.004                     
## overall.income.bl[>=50K & <100K]        -0.015 -0.049  0.082              
## overall.income.bl[<50k]                 -0.102 -0.140  0.157  0.519       
## overall.income.bl[Don't Know or Refuse] -0.083 -0.101  0.087  0.375  0.502
## sex.blFemale                             0.014  0.017  0.032 -0.010 -0.011
##                                         o..KoR
## interview_age.c9.y                            
## race_ethnicity.blHispanic                     
## race_ethnicity.blBlack                        
## race_ethnicity.blOther                        
## high.educ.blBachelor                          
## high.educ.blSome College                      
## high.educ.blHS Diploma/GED                    
## high.educ.bl< HS Diploma                      
## prnt.empl.blStay at Home Parent               
## prnt.empl.blUnemployed                        
## prnt.empl.blOther                             
## neighb_phenx_avg_p.bl.cm                      
## overall.income.bl[>=50K & <100K]              
## overall.income.bl[<50k]                       
## overall.income.bl[Don't Know or Refuse]       
## sex.blFemale                             0.004
## 
## Standardized Within-Group Residuals:
##        Min         Q1        Med         Q3        Max 
## -2.9397639 -0.6403547 -0.4930191  0.3938430  3.9186439 
## 
## Number of Observations: 23857
## Number of Groups: 
##                abcd_site subjectid %in% abcd_site 
##                       21                     9345
summary(rulebreak_zinb_r$zi.fit)
## Linear mixed-effects model fit by maximum likelihood
##   Data: data 
##   AIC BIC logLik
##    NA  NA     NA
## 
## Random effects:
##  Formula: ~1 | abcd_site
##         (Intercept)  Residual
## StdDev:   0.4582038 0.2642232
## 
## Variance function:
##  Structure: fixed weights
##  Formula: ~invwt 
## Fixed effects:  zp ~ interview_age.c9.y + race_ethnicity.bl + high.educ.bl +      prnt.empl.bl + neighb_phenx_avg_p.bl.cm + overall.income.bl +      sex.bl 
##                                             Value  Std.Error    DF   t-value
## (Intercept)                             -4.740349 0.10647654 23820 -44.52013
## interview_age.c9.y                       0.252951 0.01080520 23820  23.41014
## race_ethnicity.blHispanic                0.213824 0.03460635 23820   6.17876
## race_ethnicity.blBlack                  -0.187430 0.04798726 23820  -3.90583
## race_ethnicity.blOther                  -0.037442 0.03567213 23820  -1.04961
## high.educ.blBachelor                    -0.255060 0.02776595 23820  -9.18606
## high.educ.blSome College                -0.656245 0.03794397 23820 -17.29510
## high.educ.blHS Diploma/GED              -0.140740 0.04999219 23820  -2.81525
## high.educ.bl< HS Diploma                 0.021875 0.05839111 23820   0.37463
## prnt.empl.blStay at Home Parent         -0.010609 0.02998149 23820  -0.35384
## prnt.empl.blUnemployed                  -0.145182 0.05869044 23820  -2.47370
## prnt.empl.blOther                       -0.051310 0.04952225 23820  -1.03610
## neighb_phenx_avg_p.bl.cm                 0.383487 0.01454822 23820  26.35971
## overall.income.bl[>=50K & <100K]        -0.276262 0.03061828 23820  -9.02278
## overall.income.bl[<50k]                 -0.015216 0.03935825 23820  -0.38659
## overall.income.bl[Don't Know or Refuse]  0.400206 0.04216651 23820   9.49108
## sex.blFemale                             1.206686 0.02455250 23820  49.14717
##                                         p-value
## (Intercept)                              0.0000
## interview_age.c9.y                       0.0000
## race_ethnicity.blHispanic                0.0000
## race_ethnicity.blBlack                   0.0001
## race_ethnicity.blOther                   0.2939
## high.educ.blBachelor                     0.0000
## high.educ.blSome College                 0.0000
## high.educ.blHS Diploma/GED               0.0049
## high.educ.bl< HS Diploma                 0.7079
## prnt.empl.blStay at Home Parent          0.7235
## prnt.empl.blUnemployed                   0.0134
## prnt.empl.blOther                        0.3002
## neighb_phenx_avg_p.bl.cm                 0.0000
## overall.income.bl[>=50K & <100K]         0.0000
## overall.income.bl[<50k]                  0.6991
## overall.income.bl[Don't Know or Refuse]  0.0000
## sex.blFemale                             0.0000
##  Correlation: 
##                                         (Intr) in_.9. rc_t.H rc_t.B rc_t.O
## interview_age.c9.y                      -0.211                            
## race_ethnicity.blHispanic               -0.060  0.016                     
## race_ethnicity.blBlack                  -0.050  0.023  0.283              
## race_ethnicity.blOther                  -0.062  0.002  0.288  0.178       
## high.educ.blBachelor                    -0.073 -0.013 -0.003 -0.005  0.011
## high.educ.blSome College                -0.039 -0.008 -0.127 -0.084  0.000
## high.educ.blHS Diploma/GED              -0.025  0.013 -0.183 -0.136  0.002
## high.educ.bl< HS Diploma                -0.015  0.004 -0.204 -0.083  0.010
## prnt.empl.blStay at Home Parent         -0.049  0.018  0.048  0.075  0.012
## prnt.empl.blUnemployed                  -0.022  0.006  0.001 -0.039  0.004
## prnt.empl.blOther                       -0.018  0.004  0.031 -0.015 -0.026
## neighb_phenx_avg_p.bl.cm                -0.076  0.002  0.028  0.098  0.023
## overall.income.bl[>=50K & <100K]        -0.061  0.007 -0.089 -0.057 -0.016
## overall.income.bl[<50k]                 -0.041  0.002 -0.132 -0.147 -0.066
## overall.income.bl[Don't Know or Refuse] -0.042  0.006 -0.097 -0.099 -0.068
## sex.blFemale                            -0.173  0.019 -0.003 -0.012 -0.015
##                                         hgh..B hg..SC h..HSD h..<HD p..aHP
## interview_age.c9.y                                                        
## race_ethnicity.blHispanic                                                 
## race_ethnicity.blBlack                                                    
## race_ethnicity.blOther                                                    
## high.educ.blBachelor                                                      
## high.educ.blSome College                 0.342                            
## high.educ.blHS Diploma/GED               0.271  0.405                     
## high.educ.bl< HS Diploma                 0.239  0.380  0.405              
## prnt.empl.blStay at Home Parent         -0.043 -0.029 -0.074 -0.129       
## prnt.empl.blUnemployed                  -0.017 -0.015 -0.056 -0.100  0.133
## prnt.empl.blOther                       -0.029 -0.025  0.005 -0.028  0.139
## neighb_phenx_avg_p.bl.cm                -0.016  0.037  0.041  0.063  0.022
## overall.income.bl[>=50K & <100K]        -0.143 -0.220 -0.134 -0.091 -0.015
## overall.income.bl[<50k]                 -0.146 -0.381 -0.393 -0.359 -0.019
## overall.income.bl[Don't Know or Refuse] -0.094 -0.227 -0.263 -0.244 -0.083
## sex.blFemale                             0.004  0.004 -0.002 -0.011  0.001
##                                         prn..U prn..O n___.. o..[&< o..[<5
## interview_age.c9.y                                                        
## race_ethnicity.blHispanic                                                 
## race_ethnicity.blBlack                                                    
## race_ethnicity.blOther                                                    
## high.educ.blBachelor                                                      
## high.educ.blSome College                                                  
## high.educ.blHS Diploma/GED                                                
## high.educ.bl< HS Diploma                                                  
## prnt.empl.blStay at Home Parent                                           
## prnt.empl.blUnemployed                                                    
## prnt.empl.blOther                        0.100                            
## neighb_phenx_avg_p.bl.cm                 0.028  0.002                     
## overall.income.bl[>=50K & <100K]        -0.009 -0.052  0.090              
## overall.income.bl[<50k]                 -0.070 -0.133  0.120  0.384       
## overall.income.bl[Don't Know or Refuse] -0.060 -0.099  0.075  0.301  0.456
## sex.blFemale                             0.027  0.010  0.033 -0.015 -0.004
##                                         o..KoR
## interview_age.c9.y                            
## race_ethnicity.blHispanic                     
## race_ethnicity.blBlack                        
## race_ethnicity.blOther                        
## high.educ.blBachelor                          
## high.educ.blSome College                      
## high.educ.blHS Diploma/GED                    
## high.educ.bl< HS Diploma                      
## prnt.empl.blStay at Home Parent               
## prnt.empl.blUnemployed                        
## prnt.empl.blOther                             
## neighb_phenx_avg_p.bl.cm                      
## overall.income.bl[>=50K & <100K]              
## overall.income.bl[<50k]                       
## overall.income.bl[Don't Know or Refuse]       
## sex.blFemale                             0.015
## 
## Standardized Within-Group Residuals:
##        Min         Q1        Med         Q3        Max 
## -2.0022981 -0.4345395  0.1482323  0.2568752 45.6356384 
## 
## Number of Observations: 23857
## Number of Groups: 21
anova(rulebreak_zinb_r)
##                          numDF denDF   F-value p-value
## (Intercept)                  1 14511 192.14711  <.0001
## interview_age.c9.y           1 14511  26.83750  <.0001
## race_ethnicity.bl            3  9309  30.95390  <.0001
## high.educ.bl                 4  9309  50.04633  <.0001
## prnt.empl.bl                 3  9309  16.78715  <.0001
## neighb_phenx_avg_p.bl.cm     1  9309  62.56278  <.0001
## overall.income.bl            3  9309  14.39912  <.0001
## sex.bl                       1  9309 211.15868  <.0001

Assumption checking for ZINB Models

#Check outlier/residuals with this df
rulebreak_res <- df_cc
rulebreak_res$level1_resid.raw <- residuals(rulebreak_zinb_r)
rulebreak_res$level1_resid.pearson <- residuals(rulebreak_zinb_r, type="pearson")
#Add predicted values (Yhat)
rulebreak_res$cbcl_scr_syn_rulebreak_r_predicted <- predict(rulebreak_zinb_r,rulebreak_res,type="response")
#Incidence
rulebreak_res$incidence <- estimate.probability(rulebreak_res$cbcl_scr_syn_rulebreak_r, method="empirical")

#Plotting histogram of residuals, but may be skewed since using ZINB, so make sure to check below plots
hist(rulebreak_res$level1_resid.pearson)

### Incidence vs. X’s Plots

#age
ggplot(rulebreak_res,aes(incidence,interview_age)) + geom_point(color = "black") + geom_smooth(method = "loess")
## `geom_smooth()` using formula = 'y ~ x'
## Warning in simpleLoess(y, x, w, span, degree = degree, parametric = parametric,
## : pseudoinverse used at 0
## Warning in simpleLoess(y, x, w, span, degree = degree, parametric = parametric,
## : neighborhood radius 7.303e-05
## Warning in simpleLoess(y, x, w, span, degree = degree, parametric = parametric,
## : reciprocal condition number 1.3261e-14
## Warning in simpleLoess(y, x, w, span, degree = degree, parametric = parametric,
## : There are other near singularities as well. 1.3333e-09
## Warning in predLoess(object$y, object$x, newx = if (is.null(newdata)) object$x
## else if (is.data.frame(newdata))
## as.matrix(model.frame(delete.response(terms(object)), : pseudoinverse used at 0
## Warning in predLoess(object$y, object$x, newx = if (is.null(newdata)) object$x
## else if (is.data.frame(newdata))
## as.matrix(model.frame(delete.response(terms(object)), : neighborhood radius
## 7.303e-05
## Warning in predLoess(object$y, object$x, newx = if (is.null(newdata)) object$x
## else if (is.data.frame(newdata))
## as.matrix(model.frame(delete.response(terms(object)), : reciprocal condition
## number 1.3261e-14
## Warning in predLoess(object$y, object$x, newx = if (is.null(newdata)) object$x
## else if (is.data.frame(newdata))
## as.matrix(model.frame(delete.response(terms(object)), : There are other near
## singularities as well. 1.3333e-09

### Residuals vs Y (CBCL Outcome) Plot

plot(rulebreak_res$level1_resid.pearson, rulebreak_res$cbcl_scr_syn_rulebreak_r)

### Residuals vs Yhat Plot

plot(rulebreak_res$level1_resid.pearson, rulebreak_res$cbcl_scr_syn_rulebreak_r_predicted)

### Residuals vs Row Plot

plot(as.numeric(rownames(rulebreak_res)),rulebreak_res$level1_resid.pearson)

### Residuals vs X’s Plots

#age
ggplot(rulebreak_res,aes(level1_resid.pearson,interview_age)) + geom_point(color = "black") + geom_smooth(method = "loess")
## `geom_smooth()` using formula = 'y ~ x'

Aggressive

CBCL + AP Longitudinal Models

aggressive_zinb_r <- glmm.zinb(cbcl_scr_syn_aggressive_r ~ interview_age.c9.y + race_ethnicity.bl + high.educ.bl+ prnt.empl.bl  + neighb_phenx_avg_p.bl.cm + overall.income.bl + sex.bl, random = ~1|abcd_site/subjectid,
              zi_fixed = ~ interview_age.c9.y + race_ethnicity.bl + high.educ.bl+ prnt.empl.bl  + neighb_phenx_avg_p.bl.cm + overall.income.bl + sex.bl, zi_random = ~1|abcd_site, data = df_cc)
## Computational iterations: 11 
## Computational time: 1.444 minutes
summary(aggressive_zinb_r)
## Linear mixed-effects model fit by maximum likelihood
##   Data: df_cc 
##   AIC BIC logLik
##    NA  NA     NA
## 
## Random effects:
##  Formula: ~1 | abcd_site
##         (Intercept)
## StdDev:   0.1324439
## 
##  Formula: ~1 | subjectid %in% abcd_site
##         (Intercept)  Residual
## StdDev:     1.14606 0.9764887
## 
## Variance function:
##  Structure: fixed weights
##  Formula: ~invwt 
## Fixed effects:  cbcl_scr_syn_aggressive_r ~ interview_age.c9.y + race_ethnicity.bl +      high.educ.bl + prnt.empl.bl + neighb_phenx_avg_p.bl.cm +      overall.income.bl + sex.bl 
##                                              Value  Std.Error    DF   t-value
## (Intercept)                              0.6339320 0.04339081 14511 14.609821
## interview_age.c9.y                      -0.0440928 0.00491956 14511 -8.962753
## race_ethnicity.blHispanic               -0.0618377 0.04166748  9309 -1.484075
## race_ethnicity.blBlack                  -0.1962362 0.04651336  9309 -4.218922
## race_ethnicity.blOther                  -0.0882120 0.04277408  9309 -2.062276
## high.educ.blBachelor                     0.0878093 0.03580335  9309  2.452545
## high.educ.blSome College                 0.1513259 0.04084311  9309  3.705052
## high.educ.blHS Diploma/GED               0.0255209 0.05735353  9309  0.444976
## high.educ.bl< HS Diploma                 0.0876659 0.07376062  9309  1.188519
## prnt.empl.blStay at Home Parent          0.0249738 0.03624331  9309  0.689060
## prnt.empl.blUnemployed                   0.2126384 0.05879142  9309  3.616827
## prnt.empl.blOther                        0.1821335 0.05190958  9309  3.508668
## neighb_phenx_avg_p.bl.cm                -0.1234312 0.01499053  9309 -8.233944
## overall.income.bl[>=50K & <100K]         0.1213684 0.03617907  9309  3.354659
## overall.income.bl[<50k]                  0.2465593 0.04523913  9309  5.450134
## overall.income.bl[Don't Know or Refuse]  0.1352556 0.05686503  9309  2.378537
## sex.blFemale                            -0.2496839 0.02648675  9309 -9.426747
##                                         p-value
## (Intercept)                              0.0000
## interview_age.c9.y                       0.0000
## race_ethnicity.blHispanic                0.1378
## race_ethnicity.blBlack                   0.0000
## race_ethnicity.blOther                   0.0392
## high.educ.blBachelor                     0.0142
## high.educ.blSome College                 0.0002
## high.educ.blHS Diploma/GED               0.6563
## high.educ.bl< HS Diploma                 0.2347
## prnt.empl.blStay at Home Parent          0.4908
## prnt.empl.blUnemployed                   0.0003
## prnt.empl.blOther                        0.0005
## neighb_phenx_avg_p.bl.cm                 0.0000
## overall.income.bl[>=50K & <100K]         0.0008
## overall.income.bl[<50k]                  0.0000
## overall.income.bl[Don't Know or Refuse]  0.0174
## sex.blFemale                             0.0000
##  Correlation: 
##                                         (Intr) in_.9. rc_t.H rc_t.B rc_t.O
## interview_age.c9.y                      -0.200                            
## race_ethnicity.blHispanic               -0.160  0.007                     
## race_ethnicity.blBlack                  -0.140  0.010  0.349              
## race_ethnicity.blOther                  -0.182  0.004  0.284  0.258       
## high.educ.blBachelor                    -0.280 -0.001 -0.021 -0.015 -0.002
## high.educ.blSome College                -0.193  0.003 -0.112 -0.086 -0.026
## high.educ.blHS Diploma/GED              -0.120  0.007 -0.145 -0.148 -0.010
## high.educ.bl< HS Diploma                -0.077  0.001 -0.173 -0.081 -0.014
## prnt.empl.blStay at Home Parent         -0.139  0.009  0.042  0.092  0.018
## prnt.empl.blUnemployed                  -0.053  0.003  0.008 -0.042  0.011
## prnt.empl.blOther                       -0.061  0.003  0.040  0.010 -0.011
## neighb_phenx_avg_p.bl.cm                -0.140 -0.004  0.045  0.152  0.048
## overall.income.bl[>=50K & <100K]        -0.223 -0.001 -0.090 -0.062 -0.011
## overall.income.bl[<50k]                 -0.141 -0.001 -0.147 -0.185 -0.081
## overall.income.bl[Don't Know or Refuse] -0.124 -0.001 -0.100 -0.126 -0.056
## sex.blFemale                            -0.288  0.005 -0.009 -0.019 -0.019
##                                         hgh..B hg..SC h..HSD h..<HD p..aHP
## interview_age.c9.y                                                        
## race_ethnicity.blHispanic                                                 
## race_ethnicity.blBlack                                                    
## race_ethnicity.blOther                                                    
## high.educ.blBachelor                                                      
## high.educ.blSome College                 0.460                            
## high.educ.blHS Diploma/GED               0.339  0.502                     
## high.educ.bl< HS Diploma                 0.269  0.414  0.381              
## prnt.empl.blStay at Home Parent         -0.031 -0.016 -0.050 -0.096       
## prnt.empl.blUnemployed                  -0.010 -0.011 -0.069 -0.099  0.149
## prnt.empl.blOther                       -0.014 -0.033 -0.014 -0.020  0.159
## neighb_phenx_avg_p.bl.cm                -0.007  0.061  0.056  0.053  0.029
## overall.income.bl[>=50K & <100K]        -0.176 -0.276 -0.174 -0.117 -0.028
## overall.income.bl[<50k]                 -0.161 -0.418 -0.367 -0.312 -0.053
## overall.income.bl[Don't Know or Refuse] -0.101 -0.254 -0.241 -0.221 -0.076
## sex.blFemale                             0.014  0.021  0.014 -0.005 -0.006
##                                         prn..U prn..O n___.. o..[&< o..[<5
## interview_age.c9.y                                                        
## race_ethnicity.blHispanic                                                 
## race_ethnicity.blBlack                                                    
## race_ethnicity.blOther                                                    
## high.educ.blBachelor                                                      
## high.educ.blSome College                                                  
## high.educ.blHS Diploma/GED                                                
## high.educ.bl< HS Diploma                                                  
## prnt.empl.blStay at Home Parent                                           
## prnt.empl.blUnemployed                                                    
## prnt.empl.blOther                        0.134                            
## neighb_phenx_avg_p.bl.cm                 0.024  0.004                     
## overall.income.bl[>=50K & <100K]        -0.014 -0.049  0.083              
## overall.income.bl[<50k]                 -0.101 -0.138  0.157  0.507       
## overall.income.bl[Don't Know or Refuse] -0.078 -0.098  0.087  0.362  0.488
## sex.blFemale                             0.019  0.017  0.027 -0.005 -0.006
##                                         o..KoR
## interview_age.c9.y                            
## race_ethnicity.blHispanic                     
## race_ethnicity.blBlack                        
## race_ethnicity.blOther                        
## high.educ.blBachelor                          
## high.educ.blSome College                      
## high.educ.blHS Diploma/GED                    
## high.educ.bl< HS Diploma                      
## prnt.empl.blStay at Home Parent               
## prnt.empl.blUnemployed                        
## prnt.empl.blOther                             
## neighb_phenx_avg_p.bl.cm                      
## overall.income.bl[>=50K & <100K]              
## overall.income.bl[<50k]                       
## overall.income.bl[Don't Know or Refuse]       
## sex.blFemale                             0.008
## 
## Standardized Within-Group Residuals:
##        Min         Q1        Med         Q3        Max 
## -3.4103429 -0.7122797 -0.3411881  0.4042947  4.0404707 
## 
## Number of Observations: 23857
## Number of Groups: 
##                abcd_site subjectid %in% abcd_site 
##                       21                     9345
summary(aggressive_zinb_r$zi.fit)
## Linear mixed-effects model fit by maximum likelihood
##   Data: data 
##   AIC BIC logLik
##    NA  NA     NA
## 
## Random effects:
##  Formula: ~1 | abcd_site
##         (Intercept)  Residual
## StdDev:   0.2788267 0.4684907
## 
## Variance function:
##  Structure: fixed weights
##  Formula: ~invwt 
## Fixed effects:  zp ~ interview_age.c9.y + race_ethnicity.bl + high.educ.bl +      prnt.empl.bl + neighb_phenx_avg_p.bl.cm + overall.income.bl +      sex.bl 
##                                             Value  Std.Error    DF   t-value
## (Intercept)                             -4.335767 0.08501125 23820 -51.00227
## interview_age.c9.y                       0.245121 0.01808395 23820  13.55460
## race_ethnicity.blHispanic                0.379047 0.05914867 23820   6.40837
## race_ethnicity.blBlack                   0.742765 0.06116242 23820  12.14415
## race_ethnicity.blOther                   0.474197 0.05661694 23820   8.37553
## high.educ.blBachelor                     0.133514 0.04983392 23820   2.67917
## high.educ.blSome College                 0.230652 0.05826166 23820   3.95889
## high.educ.blHS Diploma/GED               0.375390 0.08003361 23820   4.69040
## high.educ.bl< HS Diploma                 1.003641 0.08724317 23820  11.50395
## prnt.empl.blStay at Home Parent         -0.019367 0.05161527 23820  -0.37521
## prnt.empl.blUnemployed                   0.223883 0.07382609 23820   3.03258
## prnt.empl.blOther                       -0.156931 0.07930262 23820  -1.97888
## neighb_phenx_avg_p.bl.cm                 0.259068 0.02210036 23820  11.72235
## overall.income.bl[>=50K & <100K]        -0.293459 0.05194948 23820  -5.64894
## overall.income.bl[<50k]                 -0.518706 0.06518059 23820  -7.95799
## overall.income.bl[Don't Know or Refuse]  0.050936 0.07159839 23820   0.71141
## sex.blFemale                             0.203112 0.03649427 23820   5.56559
##                                         p-value
## (Intercept)                              0.0000
## interview_age.c9.y                       0.0000
## race_ethnicity.blHispanic                0.0000
## race_ethnicity.blBlack                   0.0000
## race_ethnicity.blOther                   0.0000
## high.educ.blBachelor                     0.0074
## high.educ.blSome College                 0.0001
## high.educ.blHS Diploma/GED               0.0000
## high.educ.bl< HS Diploma                 0.0000
## prnt.empl.blStay at Home Parent          0.7075
## prnt.empl.blUnemployed                   0.0024
## prnt.empl.blOther                        0.0478
## neighb_phenx_avg_p.bl.cm                 0.0000
## overall.income.bl[>=50K & <100K]         0.0000
## overall.income.bl[<50k]                  0.0000
## overall.income.bl[Don't Know or Refuse]  0.4768
## sex.blFemale                             0.0000
##  Correlation: 
##                                         (Intr) in_.9. rc_t.H rc_t.B rc_t.O
## interview_age.c9.y                      -0.444                            
## race_ethnicity.blHispanic               -0.158  0.017                     
## race_ethnicity.blBlack                  -0.150  0.035  0.404              
## race_ethnicity.blOther                  -0.177  0.005  0.340  0.307       
## high.educ.blBachelor                    -0.219 -0.004 -0.004 -0.001  0.019
## high.educ.blSome College                -0.153  0.007 -0.115 -0.092 -0.003
## high.educ.blHS Diploma/GED              -0.105  0.023 -0.143 -0.146  0.005
## high.educ.bl< HS Diploma                -0.085  0.016 -0.173 -0.094  0.001
## prnt.empl.blStay at Home Parent         -0.111  0.021  0.050  0.102  0.021
## prnt.empl.blUnemployed                  -0.051  0.010  0.018 -0.028  0.014
## prnt.empl.blOther                       -0.039  0.011  0.043  0.002 -0.020
## neighb_phenx_avg_p.bl.cm                -0.139  0.002  0.029  0.140  0.034
## overall.income.bl[>=50K & <100K]        -0.129  0.000 -0.101 -0.090 -0.024
## overall.income.bl[<50k]                 -0.078 -0.002 -0.138 -0.206 -0.077
## overall.income.bl[Don't Know or Refuse] -0.081  0.013 -0.111 -0.147 -0.063
## sex.blFemale                            -0.235  0.015 -0.004 -0.023 -0.012
##                                         hgh..B hg..SC h..HSD h..<HD p..aHP
## interview_age.c9.y                                                        
## race_ethnicity.blHispanic                                                 
## race_ethnicity.blBlack                                                    
## race_ethnicity.blOther                                                    
## high.educ.blBachelor                                                      
## high.educ.blSome College                 0.452                            
## high.educ.blHS Diploma/GED               0.340  0.507                     
## high.educ.bl< HS Diploma                 0.317  0.491  0.475              
## prnt.empl.blStay at Home Parent         -0.030 -0.018 -0.051 -0.113       
## prnt.empl.blUnemployed                  -0.015 -0.009 -0.079 -0.129  0.175
## prnt.empl.blOther                       -0.020 -0.030 -0.011 -0.031  0.145
## neighb_phenx_avg_p.bl.cm                -0.001  0.054  0.048  0.067  0.028
## overall.income.bl[>=50K & <100K]        -0.154 -0.292 -0.188 -0.149 -0.022
## overall.income.bl[<50k]                 -0.138 -0.397 -0.379 -0.390 -0.047
## overall.income.bl[Don't Know or Refuse] -0.105 -0.291 -0.293 -0.314 -0.091
## sex.blFemale                             0.012  0.017  0.011 -0.004  0.004
##                                         prn..U prn..O n___.. o..[&< o..[<5
## interview_age.c9.y                                                        
## race_ethnicity.blHispanic                                                 
## race_ethnicity.blBlack                                                    
## race_ethnicity.blOther                                                    
## high.educ.blBachelor                                                      
## high.educ.blSome College                                                  
## high.educ.blHS Diploma/GED                                                
## high.educ.bl< HS Diploma                                                  
## prnt.empl.blStay at Home Parent                                           
## prnt.empl.blUnemployed                                                    
## prnt.empl.blOther                        0.141                            
## neighb_phenx_avg_p.bl.cm                 0.035 -0.007                     
## overall.income.bl[>=50K & <100K]        -0.017 -0.046  0.077              
## overall.income.bl[<50k]                 -0.097 -0.124  0.137  0.474       
## overall.income.bl[Don't Know or Refuse] -0.098 -0.113  0.093  0.390  0.554
## sex.blFemale                             0.035  0.011  0.024  0.001  0.000
##                                         o..KoR
## interview_age.c9.y                            
## race_ethnicity.blHispanic                     
## race_ethnicity.blBlack                        
## race_ethnicity.blOther                        
## high.educ.blBachelor                          
## high.educ.blSome College                      
## high.educ.blHS Diploma/GED                    
## high.educ.bl< HS Diploma                      
## prnt.empl.blStay at Home Parent               
## prnt.empl.blUnemployed                        
## prnt.empl.blOther                             
## neighb_phenx_avg_p.bl.cm                      
## overall.income.bl[>=50K & <100K]              
## overall.income.bl[<50k]                       
## overall.income.bl[Don't Know or Refuse]       
## sex.blFemale                             0.001
## 
## Standardized Within-Group Residuals:
##        Min         Q1        Med         Q3        Max 
## -1.1537437 -0.3647711 -0.2745072  0.2188908 19.0696631 
## 
## Number of Observations: 23857
## Number of Groups: 21
anova(aggressive_zinb_r)
##                          numDF denDF  F-value p-value
## (Intercept)                  1 14511 354.3314  <.0001
## interview_age.c9.y           1 14511  81.9047  <.0001
## race_ethnicity.bl            3  9309   2.5172  0.0563
## high.educ.bl                 4  9309  21.7835  <.0001
## prnt.empl.bl                 3  9309  13.0736  <.0001
## neighb_phenx_avg_p.bl.cm     1  9309  79.9245  <.0001
## overall.income.bl            3  9309   9.8816  <.0001
## sex.bl                       1  9309  88.8636  <.0001

Assumption checking for ZINB Models

#Check outlier/residuals with this df
aggressive_res <- df_cc
aggressive_res$level1_resid.raw <- residuals(aggressive_zinb_r)
aggressive_res$level1_resid.pearson <- residuals(aggressive_zinb_r, type="pearson")
#Add predicted values (Yhat)
aggressive_res$cbcl_scr_syn_aggressive_r_predicted <- predict(aggressive_zinb_r,aggressive_res,type="response")
#Incidence
aggressive_res$incidence <- estimate.probability(aggressive_res$cbcl_scr_syn_aggressive_r, method="empirical")

#Plotting histogram of residuals, but may be skewed since using ZINB, so make sure to check below plots
hist(aggressive_res$level1_resid.pearson)

### Incidence vs. X’s Plots

#age
ggplot(aggressive_res,aes(incidence,interview_age)) + geom_point(color = "black") + geom_smooth(method = "loess")
## `geom_smooth()` using formula = 'y ~ x'

### Residuals vs Y (CBCL Outcome) Plot

plot(aggressive_res$level1_resid.pearson, aggressive_res$cbcl_scr_syn_aggressive_r)

### Residuals vs Yhat Plot

plot(aggressive_res$level1_resid.pearson, aggressive_res$cbcl_scr_syn_aggressive_r_predicted)

### Residuals vs Row Plot

plot(as.numeric(rownames(aggressive_res)),aggressive_res$level1_resid.pearson)

### Residuals vs X’s Plots

#age
ggplot(aggressive_res,aes(level1_resid.pearson,interview_age)) + geom_point(color = "black") + geom_smooth(method = "loess")
## `geom_smooth()` using formula = 'y ~ x'

#take snapshot for environment dependencies
renv::snapshot()
#update if running again
renv::update()